From 8199d3a79c224bbe5943fa08684e1f93a17881b0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 30 Mar 2005 13:34:31 -0800 Subject: [PATCH 001/584] [PATCH] A new 10GB Ethernet Driver by Chelsio Communications A Linux driver for the Chelsio 10Gb Ethernet Network Controller by Chelsio (http://www.chelsio.com). This driver supports the Chelsio N210 NIC and is backward compatible with the Chelsio N110 model 10Gb NICs. It supports AMD64, EM64T and x86 systems. Signed-off-by: Tina Yang Signed-off-by: Scott Bardone Signed-off-by: Christoph Lameter Adrian said: - my3126.c is unused (because t1_my3126_ops isn't used anywhere) - what are the EXTRA_CFLAGS in drivers/net/chelsio/Makefile for? - $(cxgb-y) in drivers/net/chelsio/Makefile seems to be unneeded - completely unused global functions: - espi.c: t1_espi_get_intr_counts - sge.c: t1_sge_get_intr_counts - the following functions can be made static: - sge.c: t1_espi_workaround - sge.c: t1_sge_tx - subr.c: __t1_tpi_read - subr.c: __t1_tpi_write - subr.c: t1_wait_op_done shemminger said: The performance recommendations in cxgb.txt are common to all fast devices, and should be in one file rather than just for this device. I would rather see ip-sysctl.txt updated or a new file on tuning recommendations started. Some of them have consequences that aren't documented well. For example, turning off TCP timestamps risks data corruption from sequence wrap. A new driver shouldn't need so may #ifdef's unless you want to putit on older vendor versions of 2.4 Some accessor and wrapper functions like: t1_pci_read_config_4 adapter_name t1_malloc are just annoying noise. Why have useless dead code like: /* Interrupt handler */ +static int pm3393_interrupt_handler(struct cmac *cmac) +{ + u32 master_intr_status; +/* + 1. Read master interrupt register. + 2. Read BLOCK's interrupt status registers. + 3. Handle BLOCK interrupts. +*/ Jeff said: step 1: kill all the OS wrappers. And do you really need hooks for multiple MACs, when only one MAC is really supported? Typically these hooks are at a higher level anyway -- struct net_device. From: Christoph Lameter Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton diff -puN /dev/null Documentation/networking/cxgb.txt --- Documentation/networking/cxgb.txt | 322 +++++ drivers/net/Kconfig | 19 + drivers/net/Makefile | 1 + drivers/net/chelsio/Makefile | 12 + drivers/net/chelsio/ch_ethtool.h | 102 ++ drivers/net/chelsio/common.h | 269 +++++ drivers/net/chelsio/cphy.h | 150 +++ drivers/net/chelsio/cpl5_cmd.h | 145 +++ drivers/net/chelsio/cxgb2.c | 1231 +++++++++++++++++++ drivers/net/chelsio/cxgb2.h | 122 ++ drivers/net/chelsio/elmer0.h | 157 +++ drivers/net/chelsio/espi.c | 386 ++++++ drivers/net/chelsio/espi.h | 67 ++ drivers/net/chelsio/gmac.h | 133 +++ drivers/net/chelsio/mv88x201x.c | 258 ++++ drivers/net/chelsio/osdep.h | 169 +++ drivers/net/chelsio/pm3393.c | 831 +++++++++++++ drivers/net/chelsio/regs.h | 453 +++++++ drivers/net/chelsio/sge.c | 1451 +++++++++++++++++++++++ drivers/net/chelsio/sge.h | 79 ++ drivers/net/chelsio/subr.c | 831 +++++++++++++ drivers/net/chelsio/suni1x10gexp_regs.h | 221 ++++ drivers/net/chelsio/tp.c | 188 +++ drivers/net/chelsio/tp.h | 110 ++ 24 files changed, 7707 insertions(+) create mode 100644 Documentation/networking/cxgb.txt create mode 100644 drivers/net/chelsio/Makefile create mode 100644 drivers/net/chelsio/ch_ethtool.h create mode 100644 drivers/net/chelsio/common.h create mode 100644 drivers/net/chelsio/cphy.h create mode 100644 drivers/net/chelsio/cpl5_cmd.h create mode 100644 drivers/net/chelsio/cxgb2.c create mode 100644 drivers/net/chelsio/cxgb2.h create mode 100644 drivers/net/chelsio/elmer0.h create mode 100644 drivers/net/chelsio/espi.c create mode 100644 drivers/net/chelsio/espi.h create mode 100644 drivers/net/chelsio/gmac.h create mode 100644 drivers/net/chelsio/mv88x201x.c create mode 100644 drivers/net/chelsio/osdep.h create mode 100644 drivers/net/chelsio/pm3393.c create mode 100644 drivers/net/chelsio/regs.h create mode 100644 drivers/net/chelsio/sge.c create mode 100644 drivers/net/chelsio/sge.h create mode 100644 drivers/net/chelsio/subr.c create mode 100644 drivers/net/chelsio/suni1x10gexp_regs.h create mode 100644 drivers/net/chelsio/tp.c create mode 100644 drivers/net/chelsio/tp.h diff --git a/Documentation/networking/cxgb.txt b/Documentation/networking/cxgb.txt new file mode 100644 index 000000000000..9f2eb646c6f5 --- /dev/null +++ b/Documentation/networking/cxgb.txt @@ -0,0 +1,322 @@ + Chelsio N210 10Gb Ethernet Network Controller + + Driver Release Notes for Linux + + Version 2.1.0 + + March 8, 2005 + +CONTENTS +======== + INTRODUCTION + FEATURES + PERFORMANCE + DRIVER MESSAGES + KNOWN ISSUES + SUPPORT + + +INTRODUCTION +============ + + This document describes the Linux driver for Chelsio 10Gb Ethernet Network + Controller. This driver supports the Chelsio N210 NIC and is backward + compatible with the Chelsio N110 model 10Gb NICs. This driver supports AMD64 + and EM64T, and x86 systems. + + +FEATURES +======== + + Adaptive Interrupts (adaptive-rx) + --------------------------------- + + This feature provides an adaptive algorithm that adjusts the interrupt + coalescing parameters, allowing the driver to dynamically adapt the latency + settings to achieve the highest performance during various types of network + load. + + The interface used to control this feature is ethtool. Please see the + ethtool manpage for additional usage information. + + By default, adaptive-rx is disabled. + To enable adaptive-rx: + + ethtool -C adaptive-rx on + + To disable adaptive-rx, use ethtool: + + ethtool -C adaptive-rx off + + After disabling adaptive-rx, the timer latency value will be set to 50us. + You may set the timer latency after disabling adaptive-rx: + + ethtool -C rx-usecs + + An example to set the timer latency value to 100us on eth0: + + ethtool -C eth0 rx-usecs 100 + + You may also provide a timer latency value while disabling adpative-rx: + + ethtool -C adaptive-rx off rx-usecs + + If adaptive-rx is disabled and a timer latency value is specified, the timer + will be set to the specified value until changed by the user or until + adaptive-rx is enabled. + + To view the status of the adaptive-rx and timer latency values: + + ethtool -c + + + TCP Segmentation Offloading (TSO) Support + ----------------------------------------- + + This feature, also known as "large send", enables a system's protocol stack + to offload portions of outbound TCP processing to a network interface card + thereby reducing system CPU utilization and enhancing performance. + + The interface used to control this feature is ethtool version 1.8 or higher. + Please see the ethtool manpage for additional usage information. + + By default, TSO is enabled. + To disable TSO: + + ethtool -K tso off + + To enable TSO: + + ethtool -K tso on + + To view the status of TSO: + + ethtool -k + + +PERFORMANCE +=========== + + The following information is provided as an example of how to change system + parameters for "performance tuning" an what value to use. You may or may not + want to change these system parameters, depending on your server/workstation + application. Doing so is not warranted in any way by Chelsio Communications, + and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss + of data or damage to equipment. + + Your distribution may have a different way of doing things, or you may prefer + a different method. These commands are shown only to provide an example of + what to do and are by no means definitive. + + Making any of the following system changes will only last until you reboot + your system. You may want to write a script that runs at boot-up which + includes the optimal settings for your system. + + Setting PCI Latency Timer: + setpci -d 1425:* 0x0c.l=0x0000F800 + + Disabling TCP timestamp: + sysctl -w net.ipv4.tcp_timestamps=0 + + Disabling SACK: + sysctl -w net.ipv4.tcp_sack=0 + + Setting TCP read buffers (min/default/max): + sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" + + Setting TCP write buffers (min/pressure/max): + sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" + + Setting TCP buffer space (min/pressure/max): + sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" + + Setting large number of incoming connection requests (2.6.x only): + sysctl -w net.ipv4.tcp_max_syn_backlog=3000 + + Setting maximum receive socket buffer size: + sysctl -w net.core.rmem_max=524287 + + Setting maximum send socket buffer size: + sysctl -w net.core.wmem_max=524287 + + Setting default receive socket buffer size: + sysctl -w net.core.rmem_default=524287 + + Setting default send socket buffer size: + sysctl -w net.core.wmem_default=524287 + + Setting maximum option memory buffers: + sysctl -w net.core.optmem_max=524287 + + Setting maximum backlog (# of unprocessed packets before kernel drops): + sysctl -w net.core.netdev_max_backlog=300000 + + Set smp_affinity (on a multiprocessor system) to a single CPU: + echo 00000001 > /proc/irq//smp_affinity + + TCP window size for single connections: + The receive buffer (RX_WINDOW) size must be at least as large as the + Bandwidth-Delay Product of the communication link between the sender and + receiver. Due to the variations of RTT, you may want to increase the buffer + size up to 2 times the Bandwidth-Delay Product. Reference page 289 of + "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens. + At 10Gb speeds, use the following formula: + RX_WINDOW >= 1.25MBytes * RTT(in milliseconds) + Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000 + RX_WINDOW sizes of 256KB - 512KB should be sufficient. + Setting the min, max, and default receive buffer (RX_WINDOW) size: + sysctl -w net.ipv4.tcp_rmem=" " + + TCP window size for multiple connections: + The receive buffer (RX_WINDOW) size may be calculated the same as single + connections, but should be divided by the number of connections. The + smaller window prevents congestion and facilitates better pacing, + especially if/when MAC level flow control does not work well or when it is + not supported on the machine. Experimentation may be necessary to attain + the correct value. This method is provided as a starting point fot the + correct receive buffer size. + Setting the min, max, and default receive buffer (RX_WINDOW) size is + performed in the same manner as single connection. + + +DRIVER MESSAGES +=============== + + The following messages are the most common messages logged by syslog. These + may be found in /var/log/messages. + + Driver up: + Chelsio Network Driver - version 2.1.0 + + NIC detected: + eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit + + Link up: + eth#: link is up at 10 Gbps, full duplex + + Link down: + eth#: link is down + + +KNOWN ISSUES +============ + + These issues have been identified during testing. The following information + is provided as a workaround to the problem. In some cases, this problem is + inherent to Linux or to a particular Linux Distribution and/or hardware + platform. + + 1. Large number of TCP retransmits on a multiprocessor (SMP) system. + + On a system with multiple CPUs, the interrupt (IRQ) for the network + controller may be bound to more than one CPU. This will cause TCP + retransmits if the packet data were to be split across different CPUs + and re-assembled in a different order than expected. + + To eliminate the TCP retransmits, set smp_affinity on the particular + interrupt to a single CPU. You can locate the interrupt (IRQ) used on + the N110/N210 by using ifconfig: + ifconfig | grep Interrupt + Set the smp_affinity to a single CPU: + echo 1 > /proc/irq//smp_affinity + + It is highly suggested that you do not run the irqbalance daemon on your + system, as this will change any smp_affinity setting you have applied. + The irqbalance daemon runs on a 10 second interval and binds interrupts + to the least loaded CPU determined by the daemon. To disable this daemon: + chkconfig --level 2345 irqbalance off + + By default, some Linux distributions enable the kernel feature, + irqbalance, which performs the same function as the daemon. To disable + this feature, add the following line to your bootloader: + noirqbalance + + Example using the Grub bootloader: + title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp) + root (hd0,0) + kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance + initrd /initrd-2.4.21-27.ELsmp.img + + 2. After running insmod, the driver is loaded and the incorrect network + interface is brought up without running ifup. + + When using 2.4.x kernels, including RHEL kernels, the Linux kernel + invokes a script named "hotplug". This script is primarily used to + automatically bring up USB devices when they are plugged in, however, + the script also attempts to automatically bring up a network interface + after loading the kernel module. The hotplug script does this by scanning + the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking + for HWADDR=. + + If the hotplug script does not find the HWADDRR within any of the + ifcfg-eth# files, it will bring up the device with the next available + interface name. If this interface is already configured for a different + network card, your new interface will have incorrect IP address and + network settings. + + To solve this issue, you can add the HWADDR= key to the + interface config file of your network controller. + + To disable this "hotplug" feature, you may add the driver (module name) + to the "blacklist" file located in /etc/hotplug. It has been noted that + this does not work for network devices because the net.agent script + does not use the blacklist file. Simply remove, or rename, the net.agent + script located in /etc/hotplug to disable this feature. + + 3. Transport Protocol (TP) hangs when running heavy multi-connection traffic + on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset. + + If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel + chipset, you may experience the "133-Mhz Mode Split Completion Data + Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the + bus PCI-X bus. + + AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel + can provide stale data via split completion cycles to a PCI-X card that + is operating at 133 Mhz", causing data corruption. + + AMD's provides three workarounds for this problem, however, Chelsio + recommends the first option for best performance with this bug: + + For 133Mhz secondary bus operation, limit the transaction length and + the number of outstanding transactions, via BIOS configuration + programming of the PCI-X card, to the following: + + Data Length (bytes): 2k + Total allowed outstanding transactions: 1 + + Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004, + section 56, "133-MHz Mode Split Completion Data Corruption" for more + details with this bug and workarounds suggested by AMD. + + +SUPPORT +======= + + If you have problems with the software or hardware, please contact our + customer support team via email at support@chelsio.com or check our website + at http://www.chelsio.com + +=============================================================================== + + Chelsio Communications + 370 San Aleso Ave. + Suite 100 + Sunnyvale, CA 94085 + http://www.chelsio.com + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License, version 2, as +published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + + Copyright (c) 2003-2005 Chelsio Communications. All rights reserved. + +=============================================================================== diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 3a0a55b62aaf..8a7928f1d579 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2080,6 +2080,25 @@ endmenu menu "Ethernet (10000 Mbit)" depends on NETDEVICES && !UML +config CHELSIO_T1 + tristate "Chelsio 10Gb Ethernet support" + depends on PCI + help + This driver supports Chelsio N110 and N210 models 10Gb Ethernet + cards. More information about adapter features and performance + tuning is in . + + For general information about Chelsio and our products, visit + our website at . + + For customer support, please visit our customer support page at + . + + Please send feedback to . + + To compile this driver as a module, choose M here: the module + will be called cxgb. + config IXGB tristate "Intel(R) PRO/10GbE support" depends on PCI diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 6202b10dbb4d..1992166ffba9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -9,6 +9,7 @@ endif obj-$(CONFIG_E1000) += e1000/ obj-$(CONFIG_IBM_EMAC) += ibm_emac/ obj-$(CONFIG_IXGB) += ixgb/ +obj-$(CONFIG_CHELSIO_T1) += chelsio/ obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_GIANFAR) += gianfar_driver.o diff --git a/drivers/net/chelsio/Makefile b/drivers/net/chelsio/Makefile new file mode 100644 index 000000000000..ff8c11b3a4e1 --- /dev/null +++ b/drivers/net/chelsio/Makefile @@ -0,0 +1,12 @@ +# +# Chelsio 10Gb NIC driver for Linux. +# + +obj-$(CONFIG_CHELSIO_T1) += cxgb.o + +EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/chelsio $(DEBUG_FLAGS) + + +cxgb-objs := cxgb2.o espi.o tp.o pm3393.o sge.o subr.o mv88x201x.o + + diff --git a/drivers/net/chelsio/ch_ethtool.h b/drivers/net/chelsio/ch_ethtool.h new file mode 100644 index 000000000000..c523d24836b5 --- /dev/null +++ b/drivers/net/chelsio/ch_ethtool.h @@ -0,0 +1,102 @@ +/***************************************************************************** + * * + * File: ch_ethtool.h * + * $Revision: 1.5 $ * + * $Date: 2005/03/23 07:15:58 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef __CHETHTOOL_LINUX_H__ +#define __CHETHTOOL_LINUX_H__ + +/* TCB size in 32-bit words */ +#define TCB_WORDS (TCB_SIZE / 4) + +enum { + ETHTOOL_SETREG, + ETHTOOL_GETREG, + ETHTOOL_SETTPI, + ETHTOOL_GETTPI, + ETHTOOL_DEVUP, + ETHTOOL_GETMTUTAB, + ETHTOOL_SETMTUTAB, + ETHTOOL_GETMTU, + ETHTOOL_SET_PM, + ETHTOOL_GET_PM, + ETHTOOL_GET_TCAM, + ETHTOOL_SET_TCAM, + ETHTOOL_GET_TCB, + ETHTOOL_READ_TCAM_WORD, +}; + +struct ethtool_reg { + uint32_t cmd; + uint32_t addr; + uint32_t val; +}; + +struct ethtool_mtus { + uint32_t cmd; + uint16_t mtus[NMTUS]; +}; + +struct ethtool_pm { + uint32_t cmd; + uint32_t tx_pg_sz; + uint32_t tx_num_pg; + uint32_t rx_pg_sz; + uint32_t rx_num_pg; + uint32_t pm_total; +}; + +struct ethtool_tcam { + uint32_t cmd; + uint32_t tcam_size; + uint32_t nservers; + uint32_t nroutes; +}; + +struct ethtool_tcb { + uint32_t cmd; + uint32_t tcb_index; + uint32_t tcb_data[TCB_WORDS]; +}; + +struct ethtool_tcam_word { + uint32_t cmd; + uint32_t addr; + uint32_t buf[3]; +}; + +#define SIOCCHETHTOOL SIOCDEVPRIVATE +#endif diff --git a/drivers/net/chelsio/common.h b/drivers/net/chelsio/common.h new file mode 100644 index 000000000000..017684ff48dc --- /dev/null +++ b/drivers/net/chelsio/common.h @@ -0,0 +1,269 @@ +/***************************************************************************** + * * + * File: common.h * + * $Revision: 1.5 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_COMMON_H +#define CHELSIO_COMMON_H + +#define DIMOF(x) (sizeof(x)/sizeof(x[0])) + +#define NMTUS 8 +#define MAX_NPORTS 4 +#define TCB_SIZE 128 + +enum { + CHBT_BOARD_7500, + CHBT_BOARD_8000, + CHBT_BOARD_CHT101, + CHBT_BOARD_CHT110, + CHBT_BOARD_CHT210, + CHBT_BOARD_CHT204, + CHBT_BOARD_N110, + CHBT_BOARD_N210, + CHBT_BOARD_COUGAR, + CHBT_BOARD_6800, + CHBT_BOARD_SIMUL +}; + +enum { + CHBT_TERM_FPGA, + CHBT_TERM_T1, + CHBT_TERM_T2, + CHBT_TERM_T3 +}; + +enum { + CHBT_MAC_CHELSIO_A, + CHBT_MAC_IXF1010, + CHBT_MAC_PM3393, + CHBT_MAC_VSC7321, + CHBT_MAC_DUMMY +}; + +enum { + CHBT_PHY_88E1041, + CHBT_PHY_88E1111, + CHBT_PHY_88X2010, + CHBT_PHY_XPAK, + CHBT_PHY_MY3126, + CHBT_PHY_DUMMY +}; + +enum { + PAUSE_RX = 1, + PAUSE_TX = 2, + PAUSE_AUTONEG = 4 +}; + +/* Revisions of T1 chip */ +#define TERM_T1A 0 +#define TERM_T1B 1 +#define TERM_T2 3 + +struct tp_params { + unsigned int pm_size; + unsigned int cm_size; + unsigned int pm_rx_base; + unsigned int pm_tx_base; + unsigned int pm_rx_pg_size; + unsigned int pm_tx_pg_size; + unsigned int pm_rx_num_pgs; + unsigned int pm_tx_num_pgs; + unsigned int use_5tuple_mode; +}; + +struct sge_params { + unsigned int cmdQ_size[2]; + unsigned int freelQ_size[2]; + unsigned int large_buf_capacity; + unsigned int rx_coalesce_usecs; + unsigned int last_rx_coalesce_raw; + unsigned int default_rx_coalesce_usecs; + unsigned int sample_interval_usecs; + unsigned int coalesce_enable; + unsigned int polling; +}; + +struct mc5_params { + unsigned int mode; /* selects MC5 width */ + unsigned int nservers; /* size of server region */ + unsigned int nroutes; /* size of routing region */ +}; + +/* Default MC5 region sizes */ +#define DEFAULT_SERVER_REGION_LEN 256 +#define DEFAULT_RT_REGION_LEN 1024 + +struct pci_params { + unsigned short speed; + unsigned char width; + unsigned char is_pcix; +}; + +struct adapter_params { + struct sge_params sge; + struct mc5_params mc5; + struct tp_params tp; + struct pci_params pci; + + const struct board_info *brd_info; + + unsigned short mtus[NMTUS]; + unsigned int nports; /* # of ethernet ports */ + unsigned int stats_update_period; + unsigned short chip_revision; + unsigned char chip_version; + unsigned char is_asic; +}; + +struct pci_err_cnt { + unsigned int master_parity_err; + unsigned int sig_target_abort; + unsigned int rcv_target_abort; + unsigned int rcv_master_abort; + unsigned int sig_sys_err; + unsigned int det_parity_err; + unsigned int pio_parity_err; + unsigned int wf_parity_err; + unsigned int rf_parity_err; + unsigned int cf_parity_err; +}; + +struct link_config { + unsigned int supported; /* link capabilities */ + unsigned int advertising; /* advertised capabilities */ + unsigned short requested_speed; /* speed user has requested */ + unsigned short speed; /* actual link speed */ + unsigned char requested_duplex; /* duplex user has requested */ + unsigned char duplex; /* actual link duplex */ + unsigned char requested_fc; /* flow control user has requested */ + unsigned char fc; /* actual link flow control */ + unsigned char autoneg; /* autonegotiating? */ +}; + +#define SPEED_INVALID 0xffff +#define DUPLEX_INVALID 0xff + +struct mdio_ops; +struct gmac; +struct gphy; + +struct board_info { + unsigned char board; + unsigned char port_number; + unsigned long caps; + unsigned char chip_term; + unsigned char chip_mac; + unsigned char chip_phy; + unsigned int clock_core; + unsigned int clock_mc3; + unsigned int clock_mc4; + unsigned int espi_nports; + unsigned int clock_cspi; + unsigned int clock_elmer0; + unsigned char mdio_mdien; + unsigned char mdio_mdiinv; + unsigned char mdio_mdc; + unsigned char mdio_phybaseaddr; + struct gmac *gmac; + struct gphy *gphy; + struct mdio_ops *mdio_ops; + const char *desc; +}; + +#include "osdep.h" + +#ifndef PCI_VENDOR_ID_CHELSIO +#define PCI_VENDOR_ID_CHELSIO 0x1425 +#endif + +extern struct pci_device_id t1_pci_tbl[]; + +static inline int t1_is_asic(const adapter_t *adapter) +{ + return adapter->params.is_asic; +} + +static inline int adapter_matches_type(const adapter_t *adapter, + int version, int revision) +{ + return adapter->params.chip_version == version && + adapter->params.chip_revision == revision; +} + +#define t1_is_T1B(adap) adapter_matches_type(adap, CHBT_TERM_T1, TERM_T1B) +#define is_T2(adap) adapter_matches_type(adap, CHBT_TERM_T2, TERM_T2) + +/* Returns true if an adapter supports VLAN acceleration and TSO */ +static inline int vlan_tso_capable(const adapter_t *adapter) +{ + return !t1_is_T1B(adapter); +} + +#define for_each_port(adapter, iter) \ + for (iter = 0; iter < (adapter)->params.nports; ++iter) + +#define board_info(adapter) ((adapter)->params.brd_info) +#define is_10G(adapter) (board_info(adapter)->caps & SUPPORTED_10000baseT_Full) + +static inline unsigned int core_ticks_per_usec(const adapter_t *adap) +{ + return board_info(adap)->clock_core / 1000000; +} + +int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value); +int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *value); + +void t1_interrupts_enable(adapter_t *adapter); +void t1_interrupts_disable(adapter_t *adapter); +void t1_interrupts_clear(adapter_t *adapter); +int elmer0_ext_intr_handler(adapter_t *adapter); +int t1_slow_intr_handler(adapter_t *adapter); + +int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); +const struct board_info *t1_get_board_info(unsigned int board_id); +const struct board_info *t1_get_board_info_from_ids(unsigned int devid, + unsigned short ssid); +int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data); +int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, + struct adapter_params *p); +int t1_init_hw_modules(adapter_t *adapter); +int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); +void t1_free_sw_modules(adapter_t *adapter); +void t1_fatal_err(adapter_t *adapter); +#endif + diff --git a/drivers/net/chelsio/cphy.h b/drivers/net/chelsio/cphy.h new file mode 100644 index 000000000000..1bc2248264c0 --- /dev/null +++ b/drivers/net/chelsio/cphy.h @@ -0,0 +1,150 @@ +/***************************************************************************** + * * + * File: cphy.h * + * $Revision: 1.4 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_CPHY_H +#define CHELSIO_CPHY_H + +#include "common.h" + +struct mdio_ops { + void (*init)(adapter_t *adapter, const struct board_info *bi); + int (*read)(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int *val); + int (*write)(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int val); +}; + +/* PHY interrupt types */ +enum { + cphy_cause_link_change = 0x1, + cphy_cause_error = 0x2 +}; + +struct cphy; + +/* PHY operations */ +struct cphy_ops { + void (*destroy)(struct cphy *); + int (*reset)(struct cphy *, int wait); + + int (*interrupt_enable)(struct cphy *); + int (*interrupt_disable)(struct cphy *); + int (*interrupt_clear)(struct cphy *); + int (*interrupt_handler)(struct cphy *); + + int (*autoneg_enable)(struct cphy *); + int (*autoneg_disable)(struct cphy *); + int (*autoneg_restart)(struct cphy *); + + int (*advertise)(struct cphy *phy, unsigned int advertise_map); + int (*set_loopback)(struct cphy *, int on); + int (*set_speed_duplex)(struct cphy *phy, int speed, int duplex); + int (*get_link_status)(struct cphy *phy, int *link_ok, int *speed, + int *duplex, int *fc); +}; + +/* A PHY instance */ +struct cphy { + int addr; /* PHY address */ + adapter_t *adapter; /* associated adapter */ + struct cphy_ops *ops; /* PHY operations */ + int (*mdio_read)(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int *val); + int (*mdio_write)(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int val); + struct cphy_instance *instance; +}; + +/* Convenience MDIO read/write wrappers */ +static inline int mdio_read(struct cphy *cphy, int mmd, int reg, + unsigned int *valp) +{ + return cphy->mdio_read(cphy->adapter, cphy->addr, mmd, reg, valp); +} + +static inline int mdio_write(struct cphy *cphy, int mmd, int reg, + unsigned int val) +{ + return cphy->mdio_write(cphy->adapter, cphy->addr, mmd, reg, val); +} + +static inline int simple_mdio_read(struct cphy *cphy, int reg, + unsigned int *valp) +{ + return mdio_read(cphy, 0, reg, valp); +} + +static inline int simple_mdio_write(struct cphy *cphy, int reg, + unsigned int val) +{ + return mdio_write(cphy, 0, reg, val); +} + +/* Convenience initializer */ +static inline void cphy_init(struct cphy *phy, adapter_t *adapter, + int phy_addr, struct cphy_ops *phy_ops, + struct mdio_ops *mdio_ops) +{ + phy->adapter = adapter; + phy->addr = phy_addr; + phy->ops = phy_ops; + if (mdio_ops) { + phy->mdio_read = mdio_ops->read; + phy->mdio_write = mdio_ops->write; + } +} + +/* Operations of the PHY-instance factory */ +struct gphy { + /* Construct a PHY instance with the given PHY address */ + struct cphy *(*create)(adapter_t *adapter, int phy_addr, + struct mdio_ops *mdio_ops); + + /* + * Reset the PHY chip. This resets the whole PHY chip, not individual + * ports. + */ + int (*reset)(adapter_t *adapter); +}; + +extern struct gphy t1_my3126_ops; +extern struct gphy t1_mv88e1xxx_ops; +extern struct gphy t1_xpak_ops; +extern struct gphy t1_mv88x201x_ops; +extern struct gphy t1_dummy_phy_ops; +#endif diff --git a/drivers/net/chelsio/cpl5_cmd.h b/drivers/net/chelsio/cpl5_cmd.h new file mode 100644 index 000000000000..45e9248979f1 --- /dev/null +++ b/drivers/net/chelsio/cpl5_cmd.h @@ -0,0 +1,145 @@ +/***************************************************************************** + * * + * File: cpl5_cmd.h * + * $Revision: 1.4 $ * + * $Date: 2005/03/23 07:15:58 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef _CPL5_CMD_H +#define _CPL5_CMD_H + +#include + +#if !defined(__LITTLE_ENDIAN_BITFIELD) && !defined(__BIG_ENDIAN_BITFIELD) +#error "Adjust your defines" +#endif + +enum CPL_opcode { + CPL_RX_PKT = 0xAD, + CPL_TX_PKT = 0xB2, + CPL_TX_PKT_LSO = 0xB6, +}; + +enum { /* TX_PKT_LSO ethernet types */ + CPL_ETH_II, + CPL_ETH_II_VLAN, + CPL_ETH_802_3, + CPL_ETH_802_3_VLAN +}; + +struct cpl_rx_data { + __u32 rsvd0; + __u32 len; + __u32 seq; + __u16 urg; + __u8 rsvd1; + __u8 status; +}; + +/* + * We want this header's alignment to be no more stringent than 2-byte aligned. + * All fields are u8 or u16 except for the length. However that field is not + * used so we break it into 2 16-bit parts to easily meet our alignment needs. + */ +struct cpl_tx_pkt { + __u8 opcode; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 iff:4; + __u8 ip_csum_dis:1; + __u8 l4_csum_dis:1; + __u8 vlan_valid:1; + __u8 rsvd:1; +#else + __u8 rsvd:1; + __u8 vlan_valid:1; + __u8 l4_csum_dis:1; + __u8 ip_csum_dis:1; + __u8 iff:4; +#endif + __u16 vlan; + __u16 len_hi; + __u16 len_lo; +}; + +struct cpl_tx_pkt_lso { + __u8 opcode; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 iff:4; + __u8 ip_csum_dis:1; + __u8 l4_csum_dis:1; + __u8 vlan_valid:1; + __u8 rsvd:1; +#else + __u8 rsvd:1; + __u8 vlan_valid:1; + __u8 l4_csum_dis:1; + __u8 ip_csum_dis:1; + __u8 iff:4; +#endif + __u16 vlan; + __u32 len; + + __u32 rsvd2; + __u8 rsvd3; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 tcp_hdr_words:4; + __u8 ip_hdr_words:4; +#else + __u8 ip_hdr_words:4; + __u8 tcp_hdr_words:4; +#endif + __u16 eth_type_mss; +}; + +struct cpl_rx_pkt { + __u8 opcode; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 iff:4; + __u8 csum_valid:1; + __u8 bad_pkt:1; + __u8 vlan_valid:1; + __u8 rsvd:1; +#else + __u8 rsvd:1; + __u8 vlan_valid:1; + __u8 bad_pkt:1; + __u8 csum_valid:1; + __u8 iff:4; +#endif + __u16 csum; + __u16 vlan; + __u16 len; +}; + +#endif diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c new file mode 100644 index 000000000000..48c4d5acfcd1 --- /dev/null +++ b/drivers/net/chelsio/cxgb2.c @@ -0,0 +1,1231 @@ +/***************************************************************************** + * * + * File: cxgb2.c * + * $Revision: 1.11 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ch_ethtool.h" +#include "cpl5_cmd.h" +#include "regs.h" +#include "gmac.h" +#include "cphy.h" +#include "sge.h" +#include "tp.h" +#include "espi.h" + +static inline void schedule_mac_stats_update(struct adapter *ap, int secs) +{ + schedule_delayed_work(&ap->stats_update_task, secs * HZ); +} + +static inline void cancel_mac_stats_update(struct adapter *ap) +{ + cancel_delayed_work(&ap->stats_update_task); +} + +#if BITS_PER_LONG == 64 && !defined(CONFIG_X86_64) +# define FMT64 "l" +#else +# define FMT64 "ll" +#endif + +# define DRV_TYPE "" +# define MODULE_DESC "Chelsio Network Driver" + +static char driver_name[] = DRV_NAME; +static char driver_string[] = "Chelsio " DRV_TYPE "Network Driver"; +static char driver_version[] = "2.1.0"; + +#define PCI_DMA_64BIT ~0ULL +#define PCI_DMA_32BIT 0xffffffffULL + +#define MAX_CMDQ_ENTRIES 16384 +#define MAX_CMDQ1_ENTRIES 1024 +#define MAX_RX_BUFFERS 16384 +#define MAX_RX_JUMBO_BUFFERS 16384 +#define MAX_TX_BUFFERS_HIGH 16384U +#define MAX_TX_BUFFERS_LOW 1536U +#define MIN_FL_ENTRIES 32 + +#define PORT_MASK ((1 << MAX_NPORTS) - 1) + +#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ + NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) + +/* + * The EEPROM is actually bigger but only the first few bytes are used so we + * only report those. + */ +#define EEPROM_SIZE 32 + +MODULE_DESCRIPTION(MODULE_DESC); +MODULE_AUTHOR("Chelsio Communications"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, t1_pci_tbl); + +static int dflt_msg_enable = DFLT_MSG_ENABLE; + +MODULE_PARM(dflt_msg_enable, "i"); +MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T1 message enable bitmap"); + + +static const char pci_speed[][4] = { + "33", "66", "100", "133" +}; + +/* + * Setup MAC to receive the types of packets we want. + */ +static void t1_set_rxmode(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct cmac *mac = adapter->port[dev->if_port].mac; + struct t1_rx_mode rm; + + rm.dev = dev; + rm.idx = 0; + rm.list = dev->mc_list; + mac->ops->set_rx_mode(mac, &rm); +} + +static void link_report(struct port_info *p) +{ + if (!netif_carrier_ok(p->dev)) + printk(KERN_INFO "%s: link is down\n", p->dev->name); + else { + const char *s = "10 Mbps"; + + switch (p->link_config.speed) { + case SPEED_10000: s = "10 Gbps"; break; + case SPEED_1000: s = "1000 Mbps"; break; + case SPEED_100: s = "100 Mbps"; break; + } + + printk(KERN_INFO "%s: link is up at %s, %s duplex\n", + p->dev->name, s, + p->link_config.duplex == DUPLEX_FULL ? "full" : "half"); + } +} + +void t1_link_changed(struct adapter *adapter, int port_id, int link_stat, + int speed, int duplex, int pause) +{ + struct port_info *p = &adapter->port[port_id]; + + if (link_stat != netif_carrier_ok(p->dev)) { + if (link_stat) + netif_carrier_on(p->dev); + else + netif_carrier_off(p->dev); + link_report(p); + + } +} + +static void link_start(struct port_info *p) +{ + struct cmac *mac = p->mac; + + mac->ops->reset(mac); + if (mac->ops->macaddress_set) + mac->ops->macaddress_set(mac, p->dev->dev_addr); + t1_set_rxmode(p->dev); + t1_link_start(p->phy, mac, &p->link_config); + mac->ops->enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); +} + +static void enable_hw_csum(struct adapter *adapter) +{ + if (adapter->flags & TSO_CAPABLE) + t1_tp_set_ip_checksum_offload(adapter->tp, 1); /* for TSO only */ + if (adapter->flags & UDP_CSUM_CAPABLE) + t1_tp_set_udp_checksum_offload(adapter->tp, 1); + t1_tp_set_tcp_checksum_offload(adapter->tp, 1); +} + +/* + * Things to do upon first use of a card. + * This must run with the rtnl lock held. + */ +static int cxgb_up(struct adapter *adapter) +{ + int err = 0; + + if (!(adapter->flags & FULL_INIT_DONE)) { + err = t1_init_hw_modules(adapter); + if (err) + goto out_err; + + enable_hw_csum(adapter); + adapter->flags |= FULL_INIT_DONE; + } + + t1_interrupts_clear(adapter); + + if ((err = request_irq(adapter->pdev->irq, &t1_interrupt, SA_SHIRQ, + adapter->name, adapter))) + goto out_err; + + t1_sge_start(adapter->sge); + t1_interrupts_enable(adapter); + + err = 0; + out_err: + return err; +} + +/* + * Release resources when all the ports have been stopped. + */ +static void cxgb_down(struct adapter *adapter) +{ + t1_sge_stop(adapter->sge); + t1_interrupts_disable(adapter); + free_irq(adapter->pdev->irq, adapter); +} + +static int cxgb_open(struct net_device *dev) +{ + int err; + struct adapter *adapter = dev->priv; + int other_ports = adapter->open_device_map & PORT_MASK; + + if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) + return err; + + __set_bit(dev->if_port, &adapter->open_device_map); + link_start(&adapter->port[dev->if_port]); + netif_start_queue(dev); + if (!other_ports && adapter->params.stats_update_period) + schedule_mac_stats_update(adapter, + adapter->params.stats_update_period); + return 0; +} + +static int cxgb_close(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + struct cmac *mac = p->mac; + + netif_stop_queue(dev); + mac->ops->disable(mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); + netif_carrier_off(dev); + + clear_bit(dev->if_port, &adapter->open_device_map); + if (adapter->params.stats_update_period && + !(adapter->open_device_map & PORT_MASK)) { + /* Stop statistics accumulation. */ + smp_mb__after_clear_bit(); + spin_lock(&adapter->work_lock); /* sync with update task */ + spin_unlock(&adapter->work_lock); + cancel_mac_stats_update(adapter); + } + + if (!adapter->open_device_map) + cxgb_down(adapter); + return 0; +} + +static struct net_device_stats *t1_get_stats(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + struct net_device_stats *ns = &p->netstats; + const struct cmac_statistics *pstats; + + /* Do a full update of the MAC stats */ + pstats = p->mac->ops->statistics_update(p->mac, + MAC_STATS_UPDATE_FULL); + + ns->tx_packets = pstats->TxUnicastFramesOK + + pstats->TxMulticastFramesOK + pstats->TxBroadcastFramesOK; + + ns->rx_packets = pstats->RxUnicastFramesOK + + pstats->RxMulticastFramesOK + pstats->RxBroadcastFramesOK; + + ns->tx_bytes = pstats->TxOctetsOK; + ns->rx_bytes = pstats->RxOctetsOK; + + ns->tx_errors = pstats->TxLateCollisions + pstats->TxLengthErrors + + pstats->TxUnderrun + pstats->TxFramesAbortedDueToXSCollisions; + ns->rx_errors = pstats->RxDataErrors + pstats->RxJabberErrors + + pstats->RxFCSErrors + pstats->RxAlignErrors + + pstats->RxSequenceErrors + pstats->RxFrameTooLongErrors + + pstats->RxSymbolErrors + pstats->RxRuntErrors; + + ns->multicast = pstats->RxMulticastFramesOK; + ns->collisions = pstats->TxTotalCollisions; + + /* detailed rx_errors */ + ns->rx_length_errors = pstats->RxFrameTooLongErrors + + pstats->RxJabberErrors; + ns->rx_over_errors = 0; + ns->rx_crc_errors = pstats->RxFCSErrors; + ns->rx_frame_errors = pstats->RxAlignErrors; + ns->rx_fifo_errors = 0; + ns->rx_missed_errors = 0; + + /* detailed tx_errors */ + ns->tx_aborted_errors = pstats->TxFramesAbortedDueToXSCollisions; + ns->tx_carrier_errors = 0; + ns->tx_fifo_errors = pstats->TxUnderrun; + ns->tx_heartbeat_errors = 0; + ns->tx_window_errors = pstats->TxLateCollisions; + return ns; +} + +static u32 get_msglevel(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + + return adapter->msg_enable; +} + +static void set_msglevel(struct net_device *dev, u32 val) +{ + struct adapter *adapter = dev->priv; + + adapter->msg_enable = val; +} + +static char stats_strings[][ETH_GSTRING_LEN] = { + "TxOctetsOK", + "TxOctetsBad", + "TxUnicastFramesOK", + "TxMulticastFramesOK", + "TxBroadcastFramesOK", + "TxPauseFrames", + "TxFramesWithDeferredXmissions", + "TxLateCollisions", + "TxTotalCollisions", + "TxFramesAbortedDueToXSCollisions", + "TxUnderrun", + "TxLengthErrors", + "TxInternalMACXmitError", + "TxFramesWithExcessiveDeferral", + "TxFCSErrors", + + "RxOctetsOK", + "RxOctetsBad", + "RxUnicastFramesOK", + "RxMulticastFramesOK", + "RxBroadcastFramesOK", + "RxPauseFrames", + "RxFCSErrors", + "RxAlignErrors", + "RxSymbolErrors", + "RxDataErrors", + "RxSequenceErrors", + "RxRuntErrors", + "RxJabberErrors", + "RxInternalMACRcvError", + "RxInRangeLengthErrors", + "RxOutOfRangeLengthField", + "RxFrameTooLongErrors" +}; + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct adapter *adapter = dev->priv; + + strcpy(info->driver, driver_name); + strcpy(info->version, driver_version); + strcpy(info->fw_version, "N/A"); + strcpy(info->bus_info, pci_name(adapter->pdev)); +} + +static int get_stats_count(struct net_device *dev) +{ + return ARRAY_SIZE(stats_strings); +} + +static void get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, stats_strings, sizeof(stats_strings)); +} + +static void get_stats(struct net_device *dev, struct ethtool_stats *stats, + u64 *data) +{ + struct adapter *adapter = dev->priv; + struct cmac *mac = adapter->port[dev->if_port].mac; + const struct cmac_statistics *s; + + s = mac->ops->statistics_update(mac, MAC_STATS_UPDATE_FULL); + + *data++ = s->TxOctetsOK; + *data++ = s->TxOctetsBad; + *data++ = s->TxUnicastFramesOK; + *data++ = s->TxMulticastFramesOK; + *data++ = s->TxBroadcastFramesOK; + *data++ = s->TxPauseFrames; + *data++ = s->TxFramesWithDeferredXmissions; + *data++ = s->TxLateCollisions; + *data++ = s->TxTotalCollisions; + *data++ = s->TxFramesAbortedDueToXSCollisions; + *data++ = s->TxUnderrun; + *data++ = s->TxLengthErrors; + *data++ = s->TxInternalMACXmitError; + *data++ = s->TxFramesWithExcessiveDeferral; + *data++ = s->TxFCSErrors; + + *data++ = s->RxOctetsOK; + *data++ = s->RxOctetsBad; + *data++ = s->RxUnicastFramesOK; + *data++ = s->RxMulticastFramesOK; + *data++ = s->RxBroadcastFramesOK; + *data++ = s->RxPauseFrames; + *data++ = s->RxFCSErrors; + *data++ = s->RxAlignErrors; + *data++ = s->RxSymbolErrors; + *data++ = s->RxDataErrors; + *data++ = s->RxSequenceErrors; + *data++ = s->RxRuntErrors; + *data++ = s->RxJabberErrors; + *data++ = s->RxInternalMACRcvError; + *data++ = s->RxInRangeLengthErrors; + *data++ = s->RxOutOfRangeLengthField; + *data++ = s->RxFrameTooLongErrors; +} + +static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + + cmd->supported = p->link_config.supported; + cmd->advertising = p->link_config.advertising; + + if (netif_carrier_ok(dev)) { + cmd->speed = p->link_config.speed; + cmd->duplex = p->link_config.duplex; + } else { + cmd->speed = -1; + cmd->duplex = -1; + } + + cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; + cmd->phy_address = p->phy->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = p->link_config.autoneg; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static int speed_duplex_to_caps(int speed, int duplex) +{ + int cap = 0; + + switch (speed) { + case SPEED_10: + if (duplex == DUPLEX_FULL) + cap = SUPPORTED_10baseT_Full; + else + cap = SUPPORTED_10baseT_Half; + break; + case SPEED_100: + if (duplex == DUPLEX_FULL) + cap = SUPPORTED_100baseT_Full; + else + cap = SUPPORTED_100baseT_Half; + break; + case SPEED_1000: + if (duplex == DUPLEX_FULL) + cap = SUPPORTED_1000baseT_Full; + else + cap = SUPPORTED_1000baseT_Half; + break; + case SPEED_10000: + if (duplex == DUPLEX_FULL) + cap = SUPPORTED_10000baseT_Full; + } + return cap; +} + +#define ADVERTISED_MASK (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | \ + ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | \ + ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full | \ + ADVERTISED_10000baseT_Full) + +static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + struct link_config *lc = &p->link_config; + + if (!(lc->supported & SUPPORTED_Autoneg)) + return -EOPNOTSUPP; /* can't change speed/duplex */ + + if (cmd->autoneg == AUTONEG_DISABLE) { + int cap = speed_duplex_to_caps(cmd->speed, cmd->duplex); + + if (!(lc->supported & cap) || cmd->speed == SPEED_1000) + return -EINVAL; + lc->requested_speed = cmd->speed; + lc->requested_duplex = cmd->duplex; + lc->advertising = 0; + } else { + cmd->advertising &= ADVERTISED_MASK; + if (cmd->advertising & (cmd->advertising - 1)) + cmd->advertising = lc->supported; + cmd->advertising &= lc->supported; + if (!cmd->advertising) + return -EINVAL; + lc->requested_speed = SPEED_INVALID; + lc->requested_duplex = DUPLEX_INVALID; + lc->advertising = cmd->advertising | ADVERTISED_Autoneg; + } + lc->autoneg = cmd->autoneg; + if (netif_running(dev)) + t1_link_start(p->phy, p->mac, lc); + return 0; +} + +static void get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + + epause->autoneg = (p->link_config.requested_fc & PAUSE_AUTONEG) != 0; + epause->rx_pause = (p->link_config.fc & PAUSE_RX) != 0; + epause->tx_pause = (p->link_config.fc & PAUSE_TX) != 0; +} + +static int set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct adapter *adapter = dev->priv; + struct port_info *p = &adapter->port[dev->if_port]; + struct link_config *lc = &p->link_config; + + if (epause->autoneg == AUTONEG_DISABLE) + lc->requested_fc = 0; + else if (lc->supported & SUPPORTED_Autoneg) + lc->requested_fc = PAUSE_AUTONEG; + else + return -EINVAL; + + if (epause->rx_pause) + lc->requested_fc |= PAUSE_RX; + if (epause->tx_pause) + lc->requested_fc |= PAUSE_TX; + if (lc->autoneg == AUTONEG_ENABLE) { + if (netif_running(dev)) + t1_link_start(p->phy, p->mac, lc); + } else { + lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); + if (netif_running(dev)) + p->mac->ops->set_speed_duplex_fc(p->mac, -1, -1, + lc->fc); + } + return 0; +} + +static u32 get_rx_csum(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + + return (adapter->flags & RX_CSUM_ENABLED) != 0; +} + +static int set_rx_csum(struct net_device *dev, u32 data) +{ + struct adapter *adapter = dev->priv; + + if (data) + adapter->flags |= RX_CSUM_ENABLED; + else + adapter->flags &= ~RX_CSUM_ENABLED; + return 0; +} + +static int set_tso(struct net_device *dev, u32 value) +{ + struct adapter *adapter = dev->priv; + + if (!(adapter->flags & TSO_CAPABLE)) + return value ? -EOPNOTSUPP : 0; + return ethtool_op_set_tso(dev, value); +} + +static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e) +{ + struct adapter *adapter = dev->priv; + int jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + e->rx_max_pending = MAX_RX_BUFFERS; + e->rx_mini_max_pending = 0; + e->rx_jumbo_max_pending = MAX_RX_JUMBO_BUFFERS; + e->tx_max_pending = MAX_CMDQ_ENTRIES; + + e->rx_pending = adapter->params.sge.freelQ_size[!jumbo_fl]; + e->rx_mini_pending = 0; + e->rx_jumbo_pending = adapter->params.sge.freelQ_size[jumbo_fl]; + e->tx_pending = adapter->params.sge.cmdQ_size[0]; +} + +static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e) +{ + struct adapter *adapter = dev->priv; + int jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + if (e->rx_pending > MAX_RX_BUFFERS || e->rx_mini_pending || + e->rx_jumbo_pending > MAX_RX_JUMBO_BUFFERS || + e->tx_pending > MAX_CMDQ_ENTRIES || + e->rx_pending < MIN_FL_ENTRIES || + e->rx_jumbo_pending < MIN_FL_ENTRIES || + e->tx_pending < (adapter->params.nports + 1) * (MAX_SKB_FRAGS + 1)) + return -EINVAL; + + if (adapter->flags & FULL_INIT_DONE) + return -EBUSY; + + adapter->params.sge.freelQ_size[!jumbo_fl] = e->rx_pending; + adapter->params.sge.freelQ_size[jumbo_fl] = e->rx_jumbo_pending; + adapter->params.sge.cmdQ_size[0] = e->tx_pending; + adapter->params.sge.cmdQ_size[1] = e->tx_pending > MAX_CMDQ1_ENTRIES ? + MAX_CMDQ1_ENTRIES : e->tx_pending; + return 0; +} + +static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) +{ + struct adapter *adapter = dev->priv; + + unsigned int sge_coalesce_usecs = 0; + + sge_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; + sge_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; + if ( (adapter->params.sge.coalesce_enable && !c->use_adaptive_rx_coalesce) && + (c->rx_coalesce_usecs == sge_coalesce_usecs) ) { + adapter->params.sge.rx_coalesce_usecs = + adapter->params.sge.default_rx_coalesce_usecs; + } else { + adapter->params.sge.rx_coalesce_usecs = c->rx_coalesce_usecs; + } + + adapter->params.sge.last_rx_coalesce_raw = adapter->params.sge.rx_coalesce_usecs; + adapter->params.sge.last_rx_coalesce_raw *= (board_info(adapter)->clock_core / 1000000); + adapter->params.sge.sample_interval_usecs = c->rate_sample_interval; + adapter->params.sge.coalesce_enable = c->use_adaptive_rx_coalesce; + t1_sge_set_coalesce_params(adapter->sge, &adapter->params.sge); + return 0; +} + +static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) +{ + struct adapter *adapter = dev->priv; + + if (adapter->params.sge.coalesce_enable) { /* Adaptive algorithm on */ + c->rx_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; + c->rx_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; + } else { + c->rx_coalesce_usecs = adapter->params.sge.rx_coalesce_usecs; + } + c->rate_sample_interval = adapter->params.sge.sample_interval_usecs; + c->use_adaptive_rx_coalesce = adapter->params.sge.coalesce_enable; + return 0; +} + +static int get_eeprom_len(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + + return t1_is_asic(adapter) ? EEPROM_SIZE : 0; +} + +#define EEPROM_MAGIC(ap) \ + (PCI_VENDOR_ID_CHELSIO | ((ap)->params.chip_version << 16)) + +static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e, + u8 *data) +{ + int i; + u8 buf[EEPROM_SIZE] __attribute__((aligned(4))); + struct adapter *adapter = dev->priv; + + e->magic = EEPROM_MAGIC(adapter); + for (i = e->offset & ~3; i < e->offset + e->len; i += sizeof(u32)) + t1_seeprom_read(adapter, i, (u32 *)&buf[i]); + memcpy(data, buf + e->offset, e->len); + return 0; +} + +static struct ethtool_ops t1_ethtool_ops = { + .get_settings = get_settings, + .set_settings = set_settings, + .get_drvinfo = get_drvinfo, + .get_msglevel = get_msglevel, + .set_msglevel = set_msglevel, + .get_ringparam = get_sge_param, + .set_ringparam = set_sge_param, + .get_coalesce = get_coalesce, + .set_coalesce = set_coalesce, + .get_eeprom_len = get_eeprom_len, + .get_eeprom = get_eeprom, + .get_pauseparam = get_pauseparam, + .set_pauseparam = set_pauseparam, + .get_rx_csum = get_rx_csum, + .set_rx_csum = set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_link = ethtool_op_get_link, + .get_strings = get_strings, + .get_stats_count = get_stats_count, + .get_ethtool_stats = get_stats, + .get_tso = ethtool_op_get_tso, + .set_tso = set_tso, +}; + +static int ethtool_ioctl(struct net_device *dev, void *useraddr) +{ + u32 cmd; + struct adapter *adapter = dev->priv; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + switch (cmd) { + case ETHTOOL_SETREG: { + struct ethtool_reg edata; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) + return -EINVAL; + if (edata.addr == A_ESPI_MISC_CONTROL) + t1_espi_set_misc_ctrl(adapter, edata.val); + else { + if (edata.addr == 0x950) + t1_sge_set_ptimeout(adapter, edata.val); + else + writel(edata.val, adapter->regs + edata.addr); + } + break; + } + case ETHTOOL_GETREG: { + struct ethtool_reg edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) + return -EINVAL; + if (edata.addr >= 0x900 && edata.addr <= 0x93c) + edata.val = t1_espi_get_mon(adapter, edata.addr, 1); + else { + if (edata.addr == 0x950) + edata.val = t1_sge_get_ptimeout(adapter); + else + edata.val = readl(adapter->regs + edata.addr); + } + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + break; + } + case ETHTOOL_SETTPI: { + struct ethtool_reg edata; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if ((edata.addr & 3) != 0) + return -EINVAL; + t1_tpi_write(adapter, edata.addr, edata.val); + break; + } + case ETHTOOL_GETTPI: { + struct ethtool_reg edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if ((edata.addr & 3) != 0) + return -EINVAL; + t1_tpi_read(adapter, edata.addr, &edata.val); + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + break; + } + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int t1_ioctl(struct net_device *dev, struct ifreq *req, int cmd) +{ + struct adapter *adapter = dev->priv; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = adapter->port[dev->if_port].phy->addr; + /* FALLTHRU */ + case SIOCGMIIREG: { + struct cphy *phy = adapter->port[dev->if_port].phy; + u32 val; + + if (!phy->mdio_read) return -EOPNOTSUPP; + phy->mdio_read(adapter, data->phy_id, 0, data->reg_num & 0x1f, + &val); + data->val_out = val; + break; + } + case SIOCSMIIREG: { + struct cphy *phy = adapter->port[dev->if_port].phy; + + if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (!phy->mdio_write) return -EOPNOTSUPP; + phy->mdio_write(adapter, data->phy_id, 0, data->reg_num & 0x1f, + data->val_in); + break; + } + + case SIOCCHETHTOOL: + return ethtool_ioctl(dev, (void *)req->ifr_data); + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int t1_change_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + struct adapter *adapter = dev->priv; + struct cmac *mac = adapter->port[dev->if_port].mac; + + if (!mac->ops->set_mtu) + return -EOPNOTSUPP; + if (new_mtu < 68) + return -EINVAL; + if ((ret = mac->ops->set_mtu(mac, new_mtu))) + return ret; + dev->mtu = new_mtu; + return 0; +} + +static int t1_set_mac_addr(struct net_device *dev, void *p) +{ + struct adapter *adapter = dev->priv; + struct cmac *mac = adapter->port[dev->if_port].mac; + struct sockaddr *addr = p; + + if (!mac->ops->macaddress_set) + return -EOPNOTSUPP; + + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + mac->ops->macaddress_set(mac, dev->dev_addr); + return 0; +} + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +static void vlan_rx_register(struct net_device *dev, + struct vlan_group *grp) +{ + struct adapter *adapter = dev->priv; + + spin_lock_irq(&adapter->async_lock); + adapter->vlan_grp = grp; + t1_set_vlan_accel(adapter, grp != NULL); + spin_unlock_irq(&adapter->async_lock); +} + +static void vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct adapter *adapter = dev->priv; + + spin_lock_irq(&adapter->async_lock); + if (adapter->vlan_grp) + adapter->vlan_grp->vlan_devices[vid] = NULL; + spin_unlock_irq(&adapter->async_lock); +} +#endif + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void t1_netpoll(struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + + t1_interrupt(adapter->pdev->irq, adapter, NULL); +} +#endif + +/* + * Periodic accumulation of MAC statistics. This is used only if the MAC + * does not have any other way to prevent stats counter overflow. + */ +static void mac_stats_task(void *data) +{ + int i; + struct adapter *adapter = data; + + for_each_port(adapter, i) { + struct port_info *p = &adapter->port[i]; + + if (netif_running(p->dev)) + p->mac->ops->statistics_update(p->mac, + MAC_STATS_UPDATE_FAST); + } + + /* Schedule the next statistics update if any port is active. */ + spin_lock(&adapter->work_lock); + if (adapter->open_device_map & PORT_MASK) + schedule_mac_stats_update(adapter, + adapter->params.stats_update_period); + spin_unlock(&adapter->work_lock); +} + +/* + * Processes elmer0 external interrupts in process context. + */ +static void ext_intr_task(void *data) +{ + u32 enable; + struct adapter *adapter = data; + + elmer0_ext_intr_handler(adapter); + + /* Now reenable external interrupts */ + t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_EXT); + enable = t1_read_reg_4(adapter, A_PL_ENABLE); + t1_write_reg_4(adapter, A_PL_ENABLE, enable | F_PL_INTR_EXT); + adapter->slow_intr_mask |= F_PL_INTR_EXT; +} + +/* + * Interrupt-context handler for elmer0 external interrupts. + */ +void t1_elmer0_ext_intr(struct adapter *adapter) +{ + u32 enable = t1_read_reg_4(adapter, A_PL_ENABLE); + + /* + * Schedule a task to handle external interrupts as we require + * a process context. We disable EXT interrupts in the interim + * and let the task reenable them when it's done. + */ + adapter->slow_intr_mask &= ~F_PL_INTR_EXT; + t1_write_reg_4(adapter, A_PL_ENABLE, enable & ~F_PL_INTR_EXT); + schedule_work(&adapter->ext_intr_handler_task); +} + +void t1_fatal_err(struct adapter *adapter) +{ + if (adapter->flags & FULL_INIT_DONE) { + t1_sge_stop(adapter->sge); + t1_interrupts_disable(adapter); + } + CH_ALERT("%s: encountered fatal error, operation suspended\n", + adapter->name); +} + + +static int __devinit init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + static int version_printed; + + int i, err, pci_using_dac = 0; + unsigned long mmio_start, mmio_len; + const struct board_info *bi; + struct adapter *adapter = NULL; + struct port_info *pi; + + if (!version_printed) { + printk(KERN_INFO "%s - version %s\n", driver_string, + driver_version); + ++version_printed; + } + + err = pci_enable_device(pdev); + if (err) + return err; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + CH_ERR("%s: cannot find PCI device memory base address\n", + pci_name(pdev)); + err = -ENODEV; + goto out_disable_pdev; + } + + if (!pci_set_dma_mask(pdev, PCI_DMA_64BIT)) { + pci_using_dac = 1; + if (pci_set_consistent_dma_mask(pdev, PCI_DMA_64BIT)) { + CH_ERR("%s: unable to obtain 64-bit DMA for" + "consistent allocations\n", pci_name(pdev)); + err = -ENODEV; + goto out_disable_pdev; + } + } else if ((err = pci_set_dma_mask(pdev, PCI_DMA_32BIT)) != 0) { + CH_ERR("%s: no usable DMA configuration\n", pci_name(pdev)); + goto out_disable_pdev; + } + + err = pci_request_regions(pdev, driver_name); + if (err) { + CH_ERR("%s: cannot obtain PCI resources\n", pci_name(pdev)); + goto out_disable_pdev; + } + + pci_set_master(pdev); + + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + bi = t1_get_board_info(ent->driver_data); + + for (i = 0; i < bi->port_number; ++i) { + struct net_device *netdev; + + netdev = alloc_etherdev(adapter ? 0 : sizeof(*adapter)); + if (!netdev) { + err = -ENOMEM; + goto out_free_dev; + } + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + if (!adapter) { + adapter = netdev->priv; + adapter->pdev = pdev; + adapter->port[0].dev = netdev; /* so we don't leak it */ + + adapter->regs = ioremap(mmio_start, mmio_len); + if (!adapter->regs) { + CH_ERR("%s: cannot map device registers\n", + pci_name(pdev)); + err = -ENOMEM; + goto out_free_dev; + } + + if (t1_get_board_rev(adapter, bi, &adapter->params)) { + err = -ENODEV; /* Can't handle this chip rev */ + goto out_free_dev; + } + + adapter->name = pci_name(pdev); + adapter->msg_enable = dflt_msg_enable; + adapter->mmio_len = mmio_len; + + init_MUTEX(&adapter->mib_mutex); + spin_lock_init(&adapter->tpi_lock); + spin_lock_init(&adapter->work_lock); + spin_lock_init(&adapter->async_lock); + + INIT_WORK(&adapter->ext_intr_handler_task, + ext_intr_task, adapter); + INIT_WORK(&adapter->stats_update_task, mac_stats_task, + adapter); + + pci_set_drvdata(pdev, netdev); + + } + + pi = &adapter->port[i]; + pi->dev = netdev; + netif_carrier_off(netdev); + netdev->irq = pdev->irq; + netdev->if_port = i; + netdev->mem_start = mmio_start; + netdev->mem_end = mmio_start + mmio_len - 1; + netdev->priv = adapter; + netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + adapter->flags |= RX_CSUM_ENABLED | TCP_CSUM_CAPABLE; + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + if (vlan_tso_capable(adapter)) { + adapter->flags |= UDP_CSUM_CAPABLE; +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + adapter->flags |= VLAN_ACCEL_CAPABLE; + netdev->features |= + NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + netdev->vlan_rx_register = vlan_rx_register; + netdev->vlan_rx_kill_vid = vlan_rx_kill_vid; +#endif + adapter->flags |= TSO_CAPABLE; + netdev->features |= NETIF_F_TSO; + } + + netdev->open = cxgb_open; + netdev->stop = cxgb_close; + netdev->hard_start_xmit = t1_start_xmit; + netdev->hard_header_len += (adapter->flags & TSO_CAPABLE) ? + sizeof(struct cpl_tx_pkt_lso) : + sizeof(struct cpl_tx_pkt); + netdev->get_stats = t1_get_stats; + netdev->set_multicast_list = t1_set_rxmode; + netdev->do_ioctl = t1_ioctl; + netdev->change_mtu = t1_change_mtu; + netdev->set_mac_address = t1_set_mac_addr; +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = t1_netpoll; +#endif + netdev->weight = 64; + + SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); + } + + if (t1_init_sw_modules(adapter, bi) < 0) { + err = -ENODEV; + goto out_free_dev; + } + + /* + * The card is now ready to go. If any errors occur during device + * registration we do not fail the whole card but rather proceed only + * with the ports we manage to register successfully. However we must + * register at least one net device. + */ + for (i = 0; i < bi->port_number; ++i) { + err = register_netdev(adapter->port[i].dev); + if (err) + CH_WARN("%s: cannot register net device %s, skipping\n", + pci_name(pdev), adapter->port[i].dev->name); + else { + /* + * Change the name we use for messages to the name of + * the first successfully registered interface. + */ + if (!adapter->registered_device_map) + adapter->name = adapter->port[i].dev->name; + + __set_bit(i, &adapter->registered_device_map); + } + } + if (!adapter->registered_device_map) { + CH_ERR("%s: could not register any net devices\n", + pci_name(pdev)); + goto out_release_adapter_res; + } + + printk(KERN_INFO "%s: %s (rev %d), %s %dMHz/%d-bit\n", adapter->name, + bi->desc, adapter->params.chip_revision, + adapter->params.pci.is_pcix ? "PCIX" : "PCI", + adapter->params.pci.speed, adapter->params.pci.width); + return 0; + + out_release_adapter_res: + t1_free_sw_modules(adapter); + out_free_dev: + if (adapter) { + if (adapter->regs) + iounmap(adapter->regs); + for (i = bi->port_number - 1; i >= 0; --i) + if (adapter->port[i].dev) + free_netdev(adapter->port[i].dev); + } + pci_release_regions(pdev); + out_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static inline void t1_sw_reset(struct pci_dev *pdev) +{ + pci_write_config_dword(pdev, A_PCICFG_PM_CSR, 3); + pci_write_config_dword(pdev, A_PCICFG_PM_CSR, 0); +} + +static void __devexit remove_one(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev) { + int i; + struct adapter *adapter = dev->priv; + + for_each_port(adapter, i) + if (test_bit(i, &adapter->registered_device_map)) + unregister_netdev(adapter->port[i].dev); + + t1_free_sw_modules(adapter); + iounmap(adapter->regs); + while (--i >= 0) + if (adapter->port[i].dev) + free_netdev(adapter->port[i].dev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + t1_sw_reset(pdev); + } +} + +static struct pci_driver driver = { + .name = driver_name, + .id_table = t1_pci_tbl, + .probe = init_one, + .remove = __devexit_p(remove_one), +}; + +static int __init t1_init_module(void) +{ + return pci_module_init(&driver); +} + +static void __exit t1_cleanup_module(void) +{ + pci_unregister_driver(&driver); +} + +module_init(t1_init_module); +module_exit(t1_cleanup_module); + diff --git a/drivers/net/chelsio/cxgb2.h b/drivers/net/chelsio/cxgb2.h new file mode 100644 index 000000000000..6ac326afcf01 --- /dev/null +++ b/drivers/net/chelsio/cxgb2.h @@ -0,0 +1,122 @@ +/***************************************************************************** + * * + * File: cxgb2.h * + * $Revision: 1.8 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef __CXGB_LINUX_H__ +#define __CXGB_LINUX_H__ + +#include +#include +#include +#include +#include + +/* This belongs in if_ether.h */ +#define ETH_P_CPL5 0xf + +struct cmac; +struct cphy; + +struct port_info { + struct net_device *dev; + struct cmac *mac; + struct cphy *phy; + struct link_config link_config; + struct net_device_stats netstats; +}; + +struct cxgbdev; +struct t1_sge; +struct pemc3; +struct pemc4; +struct pemc5; +struct peulp; +struct petp; +struct pecspi; +struct peespi; +struct work_struct; +struct vlan_group; + +enum { /* adapter flags */ + FULL_INIT_DONE = 0x1, + USING_MSI = 0x2, + TSO_CAPABLE = 0x4, + TCP_CSUM_CAPABLE = 0x8, + UDP_CSUM_CAPABLE = 0x10, + VLAN_ACCEL_CAPABLE = 0x20, + RX_CSUM_ENABLED = 0x40, +}; + +struct adapter { + u8 *regs; + struct pci_dev *pdev; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned int flags; + + const char *name; + int msg_enable; + u32 mmio_len; + + struct work_struct ext_intr_handler_task; + struct adapter_params params; + + struct vlan_group *vlan_grp; + + /* Terminator modules. */ + struct sge *sge; + struct pemc3 *mc3; + struct pemc4 *mc4; + struct pemc5 *mc5; + struct petp *tp; + struct pecspi *cspi; + struct peespi *espi; + struct peulp *ulp; + + struct port_info port[MAX_NPORTS]; + struct work_struct stats_update_task; + struct timer_list stats_update_timer; + + struct semaphore mib_mutex; + spinlock_t tpi_lock; + spinlock_t work_lock; + + spinlock_t async_lock ____cacheline_aligned; /* guards async operations */ + u32 slow_intr_mask; +}; + +#endif diff --git a/drivers/net/chelsio/elmer0.h b/drivers/net/chelsio/elmer0.h new file mode 100644 index 000000000000..08f148643e7f --- /dev/null +++ b/drivers/net/chelsio/elmer0.h @@ -0,0 +1,157 @@ +/***************************************************************************** + * * + * File: elmer0.h * + * $Revision: 1.3 $ * + * $Date: 2005/03/23 07:15:58 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_ELMER0_H +#define CHELSIO_ELMER0_H + +/* ELMER0 flavors */ +enum { + ELMER0_XC2S300E_6FT256_C, + ELMER0_XC2S100E_6TQ144_C +}; + +/* ELMER0 registers */ +#define A_ELMER0_VERSION 0x100000 +#define A_ELMER0_PHY_CFG 0x100004 +#define A_ELMER0_INT_ENABLE 0x100008 +#define A_ELMER0_INT_CAUSE 0x10000c +#define A_ELMER0_GPI_CFG 0x100010 +#define A_ELMER0_GPI_STAT 0x100014 +#define A_ELMER0_GPO 0x100018 +#define A_ELMER0_PORT0_MI1_CFG 0x400000 + +#define S_MI1_MDI_ENABLE 0 +#define V_MI1_MDI_ENABLE(x) ((x) << S_MI1_MDI_ENABLE) +#define F_MI1_MDI_ENABLE V_MI1_MDI_ENABLE(1U) + +#define S_MI1_MDI_INVERT 1 +#define V_MI1_MDI_INVERT(x) ((x) << S_MI1_MDI_INVERT) +#define F_MI1_MDI_INVERT V_MI1_MDI_INVERT(1U) + +#define S_MI1_PREAMBLE_ENABLE 2 +#define V_MI1_PREAMBLE_ENABLE(x) ((x) << S_MI1_PREAMBLE_ENABLE) +#define F_MI1_PREAMBLE_ENABLE V_MI1_PREAMBLE_ENABLE(1U) + +#define S_MI1_SOF 3 +#define M_MI1_SOF 0x3 +#define V_MI1_SOF(x) ((x) << S_MI1_SOF) +#define G_MI1_SOF(x) (((x) >> S_MI1_SOF) & M_MI1_SOF) + +#define S_MI1_CLK_DIV 5 +#define M_MI1_CLK_DIV 0xff +#define V_MI1_CLK_DIV(x) ((x) << S_MI1_CLK_DIV) +#define G_MI1_CLK_DIV(x) (((x) >> S_MI1_CLK_DIV) & M_MI1_CLK_DIV) + +#define A_ELMER0_PORT0_MI1_ADDR 0x400004 + +#define S_MI1_REG_ADDR 0 +#define M_MI1_REG_ADDR 0x1f +#define V_MI1_REG_ADDR(x) ((x) << S_MI1_REG_ADDR) +#define G_MI1_REG_ADDR(x) (((x) >> S_MI1_REG_ADDR) & M_MI1_REG_ADDR) + +#define S_MI1_PHY_ADDR 5 +#define M_MI1_PHY_ADDR 0x1f +#define V_MI1_PHY_ADDR(x) ((x) << S_MI1_PHY_ADDR) +#define G_MI1_PHY_ADDR(x) (((x) >> S_MI1_PHY_ADDR) & M_MI1_PHY_ADDR) + +#define A_ELMER0_PORT0_MI1_DATA 0x400008 + +#define S_MI1_DATA 0 +#define M_MI1_DATA 0xffff +#define V_MI1_DATA(x) ((x) << S_MI1_DATA) +#define G_MI1_DATA(x) (((x) >> S_MI1_DATA) & M_MI1_DATA) + +#define A_ELMER0_PORT0_MI1_OP 0x40000c + +#define S_MI1_OP 0 +#define M_MI1_OP 0x3 +#define V_MI1_OP(x) ((x) << S_MI1_OP) +#define G_MI1_OP(x) (((x) >> S_MI1_OP) & M_MI1_OP) + +#define S_MI1_ADDR_AUTOINC 2 +#define V_MI1_ADDR_AUTOINC(x) ((x) << S_MI1_ADDR_AUTOINC) +#define F_MI1_ADDR_AUTOINC V_MI1_ADDR_AUTOINC(1U) + +#define S_MI1_OP_BUSY 31 +#define V_MI1_OP_BUSY(x) ((x) << S_MI1_OP_BUSY) +#define F_MI1_OP_BUSY V_MI1_OP_BUSY(1U) + +#define A_ELMER0_PORT1_MI1_CFG 0x500000 +#define A_ELMER0_PORT1_MI1_ADDR 0x500004 +#define A_ELMER0_PORT1_MI1_DATA 0x500008 +#define A_ELMER0_PORT1_MI1_OP 0x50000c +#define A_ELMER0_PORT2_MI1_CFG 0x600000 +#define A_ELMER0_PORT2_MI1_ADDR 0x600004 +#define A_ELMER0_PORT2_MI1_DATA 0x600008 +#define A_ELMER0_PORT2_MI1_OP 0x60000c +#define A_ELMER0_PORT3_MI1_CFG 0x700000 +#define A_ELMER0_PORT3_MI1_ADDR 0x700004 +#define A_ELMER0_PORT3_MI1_DATA 0x700008 +#define A_ELMER0_PORT3_MI1_OP 0x70000c + +/* Simple bit definition for GPI and GP0 registers. */ +#define ELMER0_GP_BIT0 0x0001 +#define ELMER0_GP_BIT1 0x0002 +#define ELMER0_GP_BIT2 0x0004 +#define ELMER0_GP_BIT3 0x0008 +#define ELMER0_GP_BIT4 0x0010 +#define ELMER0_GP_BIT5 0x0020 +#define ELMER0_GP_BIT6 0x0040 +#define ELMER0_GP_BIT7 0x0080 +#define ELMER0_GP_BIT8 0x0100 +#define ELMER0_GP_BIT9 0x0200 +#define ELMER0_GP_BIT10 0x0400 +#define ELMER0_GP_BIT11 0x0800 +#define ELMER0_GP_BIT12 0x1000 +#define ELMER0_GP_BIT13 0x2000 +#define ELMER0_GP_BIT14 0x4000 +#define ELMER0_GP_BIT15 0x8000 +#define ELMER0_GP_BIT16 0x10000 +#define ELMER0_GP_BIT17 0x20000 +#define ELMER0_GP_BIT18 0x40000 +#define ELMER0_GP_BIT19 0x80000 + +#define MI1_OP_DIRECT_WRITE 1 +#define MI1_OP_DIRECT_READ 2 + +#define MI1_OP_INDIRECT_ADDRESS 0 +#define MI1_OP_INDIRECT_WRITE 1 +#define MI1_OP_INDIRECT_READ_INC 2 +#define MI1_OP_INDIRECT_READ 3 + +#endif diff --git a/drivers/net/chelsio/espi.c b/drivers/net/chelsio/espi.c new file mode 100644 index 000000000000..7ec2dc7bafac --- /dev/null +++ b/drivers/net/chelsio/espi.c @@ -0,0 +1,386 @@ +/***************************************************************************** + * * + * File: espi.c * + * $Revision: 1.9 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * Ethernet SPI functionality. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" +#include "regs.h" +#include "espi.h" + +struct peespi { + adapter_t *adapter; + struct espi_intr_counts intr_cnt; + u32 misc_ctrl; + spinlock_t lock; +}; + +#define ESPI_INTR_MASK (F_DIP4ERR | F_RXDROP | F_TXDROP | F_RXOVERFLOW | \ + F_RAMPARITYERR | F_DIP2PARITYERR) +#define MON_MASK (V_MONITORED_PORT_NUM(3) | F_MONITORED_DIRECTION \ + | F_MONITORED_INTERFACE) + +#define TRICN_CNFG 14 +#define TRICN_CMD_READ 0x11 +#define TRICN_CMD_WRITE 0x21 +#define TRICN_CMD_ATTEMPTS 10 + +static int tricn_write(adapter_t *adapter, int bundle_addr, int module_addr, + int ch_addr, int reg_offset, u32 wr_data) +{ + int busy, attempts = TRICN_CMD_ATTEMPTS; + + t1_write_reg_4(adapter, A_ESPI_CMD_ADDR, V_WRITE_DATA(wr_data) | + V_REGISTER_OFFSET(reg_offset) | + V_CHANNEL_ADDR(ch_addr) | V_MODULE_ADDR(module_addr) | + V_BUNDLE_ADDR(bundle_addr) | + V_SPI4_COMMAND(TRICN_CMD_WRITE)); + t1_write_reg_4(adapter, A_ESPI_GOSTAT, 0); + + do { + busy = t1_read_reg_4(adapter, A_ESPI_GOSTAT) & F_ESPI_CMD_BUSY; + } while (busy && --attempts); + + if (busy) + CH_ERR("%s: TRICN write timed out\n", adapter->name); + + return busy; +} + +/* 1. Deassert rx_reset_core. */ +/* 2. Program TRICN_CNFG registers. */ +/* 3. Deassert rx_reset_link */ +static int tricn_init(adapter_t *adapter) +{ + int i = 0; + int sme = 1; + int stat = 0; + int timeout = 0; + int is_ready = 0; + int dynamic_deskew = 0; + + if (dynamic_deskew) + sme = 0; + + + /* 1 */ + timeout=1000; + do { + stat = t1_read_reg_4(adapter, A_ESPI_RX_RESET); + is_ready = (stat & 0x4); + timeout--; + udelay(5); + } while (!is_ready || (timeout==0)); + t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x2); + if (timeout==0) + { + CH_ERR("ESPI : ERROR : Timeout tricn_init() \n"); + t1_fatal_err(adapter); + } + + /* 2 */ + if (sme) { + tricn_write(adapter, 0, 0, 0, TRICN_CNFG, 0x81); + tricn_write(adapter, 0, 1, 0, TRICN_CNFG, 0x81); + tricn_write(adapter, 0, 2, 0, TRICN_CNFG, 0x81); + } + for (i=1; i<= 8; i++) tricn_write(adapter, 0, 0, i, TRICN_CNFG, 0xf1); + for (i=1; i<= 2; i++) tricn_write(adapter, 0, 1, i, TRICN_CNFG, 0xf1); + for (i=1; i<= 3; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xe1); + for (i=4; i<= 4; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xf1); + for (i=5; i<= 5; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xe1); + for (i=6; i<= 6; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xf1); + for (i=7; i<= 7; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0x80); + for (i=8; i<= 8; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xf1); + + /* 3 */ + t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x3); + + return 0; +} + +void t1_espi_intr_enable(struct peespi *espi) +{ + u32 enable, pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + + /* + * Cannot enable ESPI interrupts on T1B because HW asserts the + * interrupt incorrectly, namely the driver gets ESPI interrupts + * but no data is actually dropped (can verify this reading the ESPI + * drop registers). Also, once the ESPI interrupt is asserted it + * cannot be cleared (HW bug). + */ + enable = t1_is_T1B(espi->adapter) ? 0 : ESPI_INTR_MASK; + t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, enable); + t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr | F_PL_INTR_ESPI); +} + +void t1_espi_intr_clear(struct peespi *espi) +{ + t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, 0xffffffff); + t1_write_reg_4(espi->adapter, A_PL_CAUSE, F_PL_INTR_ESPI); +} + +void t1_espi_intr_disable(struct peespi *espi) +{ + u32 pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + + t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, 0); + t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr & ~F_PL_INTR_ESPI); +} + +int t1_espi_intr_handler(struct peespi *espi) +{ + u32 cnt; + u32 status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + + if (status & F_DIP4ERR) + espi->intr_cnt.DIP4_err++; + if (status & F_RXDROP) + espi->intr_cnt.rx_drops++; + if (status & F_TXDROP) + espi->intr_cnt.tx_drops++; + if (status & F_RXOVERFLOW) + espi->intr_cnt.rx_ovflw++; + if (status & F_RAMPARITYERR) + espi->intr_cnt.parity_err++; + if (status & F_DIP2PARITYERR) { + espi->intr_cnt.DIP2_parity_err++; + + /* + * Must read the error count to clear the interrupt + * that it causes. + */ + cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + } + + /* + * For T1B we need to write 1 to clear ESPI interrupts. For T2+ we + * write the status as is. + */ + if (status && t1_is_T1B(espi->adapter)) + status = 1; + t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + return 0; +} + +static void espi_setup_for_pm3393(adapter_t *adapter) +{ + u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; + + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); + t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); + t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); + t1_write_reg_4(adapter, A_PORT_CONFIG, + V_RX_NPORTS(1) | V_TX_NPORTS(1)); +} + +static void espi_setup_for_vsc7321(adapter_t *adapter) +{ + u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; + + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); + t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); + t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); + t1_write_reg_4(adapter, A_PORT_CONFIG, + V_RX_NPORTS(1) | V_TX_NPORTS(1)); +} + +/* + * Note that T1B requires at least 2 ports for IXF1010 due to a HW bug. + */ +static void espi_setup_for_ixf1010(adapter_t *adapter, int nports) +{ + t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 1); + if (nports == 4) { + if (is_T2(adapter)) { + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, + 0xf00); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, + 0x3c0); + } else { + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, + 0x7ff); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, + 0x1ff); + } + } else { + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, + 0x1fff); + t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, + 0x7ff); + } + t1_write_reg_4(adapter, A_PORT_CONFIG, + V_RX_NPORTS(nports) | V_TX_NPORTS(nports)); +} + +/* T2 Init part -- */ +/* 1. Set T_ESPI_MISCCTRL_ADDR */ +/* 2. Init ESPI registers. */ +/* 3. Init TriCN Hard Macro */ +int t1_espi_init(struct peespi *espi, int mac_type, int nports) +{ + u32 status_enable_extra = 0; + adapter_t *adapter = espi->adapter; + u32 cnt; + u32 status, burstval = 0x800100; + + /* Disable ESPI training. MACs that can handle it enable it below. */ + t1_write_reg_4(adapter, A_ESPI_TRAIN, 0); + + if (is_T2(adapter)) { + t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, + V_OUT_OF_SYNC_COUNT(4) | + V_DIP2_PARITY_ERR_THRES(3) | V_DIP4_THRES(1)); + if (nports == 4) { + /* T204: maxburst1 = 0x40, maxburst2 = 0x20 */ + burstval = 0x200040; + } + } + t1_write_reg_4(adapter, A_ESPI_MAXBURST1_MAXBURST2, burstval); + + if (mac_type == CHBT_MAC_PM3393) + espi_setup_for_pm3393(adapter); + else if (mac_type == CHBT_MAC_VSC7321) + espi_setup_for_vsc7321(adapter); + else if (mac_type == CHBT_MAC_IXF1010) { + status_enable_extra = F_INTEL1010MODE; + espi_setup_for_ixf1010(adapter, nports); + } else + return -1; + + /* + * Make sure any pending interrupts from the SPI are + * Cleared before enabling the interrupt. + */ + t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, ESPI_INTR_MASK); + status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + if (status & F_DIP2PARITYERR) { + cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + } + + /* + * For T1B we need to write 1 to clear ESPI interrupts. For T2+ we + * write the status as is. + */ + if (status && t1_is_T1B(espi->adapter)) + status = 1; + t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + + t1_write_reg_4(adapter, A_ESPI_FIFO_STATUS_ENABLE, + status_enable_extra | F_RXSTATUSENABLE); + + if (is_T2(adapter)) { + tricn_init(adapter); + /* + * Always position the control at the 1st port egress IN + * (sop,eop) counter to reduce PIOs for T/N210 workaround. + */ + espi->misc_ctrl = (t1_read_reg_4(adapter, A_ESPI_MISC_CONTROL) + & ~MON_MASK) | (F_MONITORED_DIRECTION + | F_MONITORED_INTERFACE); + t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + spin_lock_init(&espi->lock); + } + + return 0; +} + +void t1_espi_destroy(struct peespi *espi) +{ + kfree(espi); +} + +struct peespi *t1_espi_create(adapter_t *adapter) +{ + struct peespi *espi = kmalloc(sizeof(*espi), GFP_KERNEL); + + memset(espi, 0, sizeof(*espi)); + + if (espi) + espi->adapter = adapter; + return espi; +} + +void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val) +{ + struct peespi *espi = adapter->espi; + + if (!is_T2(adapter)) + return; + spin_lock(&espi->lock); + espi->misc_ctrl = (val & ~MON_MASK) | + (espi->misc_ctrl & MON_MASK); + t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + spin_unlock(&espi->lock); +} + +u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) +{ + struct peespi *espi = adapter->espi; + u32 sel; + + if (!is_T2(adapter)) + return 0; + sel = V_MONITORED_PORT_NUM((addr & 0x3c) >> 2); + if (!wait) { + if (!spin_trylock(&espi->lock)) + return 0; + } + else + spin_lock(&espi->lock); + if ((sel != (espi->misc_ctrl & MON_MASK))) { + t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, + ((espi->misc_ctrl & ~MON_MASK) | sel)); + sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); + t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, + espi->misc_ctrl); + } + else + sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); + spin_unlock(&espi->lock); + return sel; +} diff --git a/drivers/net/chelsio/espi.h b/drivers/net/chelsio/espi.h new file mode 100644 index 000000000000..0f84e8b6399f --- /dev/null +++ b/drivers/net/chelsio/espi.h @@ -0,0 +1,67 @@ +/***************************************************************************** + * * + * File: espi.h * + * $Revision: 1.4 $ * + * $Date: 2005/03/23 07:15:58 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_ESPI_H +#define CHELSIO_ESPI_H + +#include "common.h" + +struct espi_intr_counts { + unsigned int DIP4_err; + unsigned int rx_drops; + unsigned int tx_drops; + unsigned int rx_ovflw; + unsigned int parity_err; + unsigned int DIP2_parity_err; +}; + +struct peespi; + +struct peespi *t1_espi_create(adapter_t *adapter); +void t1_espi_destroy(struct peespi *espi); +int t1_espi_init(struct peespi *espi, int mac_type, int nports); + +void t1_espi_intr_enable(struct peespi *); +void t1_espi_intr_clear(struct peespi *); +void t1_espi_intr_disable(struct peespi *); +int t1_espi_intr_handler(struct peespi *); + +void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val); +u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait); + +#endif diff --git a/drivers/net/chelsio/gmac.h b/drivers/net/chelsio/gmac.h new file mode 100644 index 000000000000..24501e2232cc --- /dev/null +++ b/drivers/net/chelsio/gmac.h @@ -0,0 +1,133 @@ +/***************************************************************************** + * * + * File: gmac.h * + * $Revision: 1.3 $ * + * $Date: 2005/03/23 07:15:58 $ * + * Description: * + * Generic MAC functionality. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_GMAC_H +#define CHELSIO_GMAC_H + +#include "common.h" + +enum { MAC_STATS_UPDATE_FAST, MAC_STATS_UPDATE_FULL }; +enum { MAC_DIRECTION_RX = 1, MAC_DIRECTION_TX = 2 }; + +struct cmac_statistics { + /* Transmit */ + u64 TxOctetsOK; + u64 TxOctetsBad; + u64 TxUnicastFramesOK; + u64 TxMulticastFramesOK; + u64 TxBroadcastFramesOK; + u64 TxPauseFrames; + u64 TxFramesWithDeferredXmissions; + u64 TxLateCollisions; + u64 TxTotalCollisions; + u64 TxFramesAbortedDueToXSCollisions; + u64 TxUnderrun; + u64 TxLengthErrors; + u64 TxInternalMACXmitError; + u64 TxFramesWithExcessiveDeferral; + u64 TxFCSErrors; + + /* Receive */ + u64 RxOctetsOK; + u64 RxOctetsBad; + u64 RxUnicastFramesOK; + u64 RxMulticastFramesOK; + u64 RxBroadcastFramesOK; + u64 RxPauseFrames; + u64 RxFCSErrors; + u64 RxAlignErrors; + u64 RxSymbolErrors; + u64 RxDataErrors; + u64 RxSequenceErrors; + u64 RxRuntErrors; + u64 RxJabberErrors; + u64 RxInternalMACRcvError; + u64 RxInRangeLengthErrors; + u64 RxOutOfRangeLengthField; + u64 RxFrameTooLongErrors; +}; + +struct cmac_ops { + void (*destroy)(struct cmac *); + int (*reset)(struct cmac *); + int (*interrupt_enable)(struct cmac *); + int (*interrupt_disable)(struct cmac *); + int (*interrupt_clear)(struct cmac *); + int (*interrupt_handler)(struct cmac *); + + int (*enable)(struct cmac *, int); + int (*disable)(struct cmac *, int); + + int (*loopback_enable)(struct cmac *); + int (*loopback_disable)(struct cmac *); + + int (*set_mtu)(struct cmac *, int mtu); + int (*set_rx_mode)(struct cmac *, struct t1_rx_mode *rm); + + int (*set_speed_duplex_fc)(struct cmac *, int speed, int duplex, int fc); + int (*get_speed_duplex_fc)(struct cmac *, int *speed, int *duplex, + int *fc); + + const struct cmac_statistics *(*statistics_update)(struct cmac *, int); + + int (*macaddress_get)(struct cmac *, u8 mac_addr[6]); + int (*macaddress_set)(struct cmac *, u8 mac_addr[6]); +}; + +typedef struct _cmac_instance cmac_instance; + +struct cmac { + struct cmac_statistics stats; + adapter_t *adapter; + struct cmac_ops *ops; + cmac_instance *instance; +}; + +struct gmac { + unsigned int stats_update_period; + struct cmac *(*create)(adapter_t *adapter, int index); + int (*reset)(adapter_t *); +}; + +extern struct gmac t1_pm3393_ops; +extern struct gmac t1_chelsio_mac_ops; +extern struct gmac t1_vsc7321_ops; +extern struct gmac t1_ixf1010_ops; +extern struct gmac t1_dummy_mac_ops; +#endif diff --git a/drivers/net/chelsio/mv88x201x.c b/drivers/net/chelsio/mv88x201x.c new file mode 100644 index 000000000000..f54133af1bce --- /dev/null +++ b/drivers/net/chelsio/mv88x201x.c @@ -0,0 +1,258 @@ +/***************************************************************************** + * * + * File: mv88x201x.c * + * $Revision: 1.7 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * Marvell PHY (mv88x201x) functionality. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "cphy.h" +#include "elmer0.h" + +/* + * The 88x2010 Rev C. requires some link status registers * to be read + * twice in order to get the right values. Future * revisions will fix + * this problem and then this macro * can disappear. + */ +#define MV88x2010_LINK_STATUS_BUGS 1 + +static int led_init(struct cphy *cphy) +{ + /* Setup the LED registers so we can turn on/off. + * Writing these bits maps control to another + * register. mmd(0x1) addr(0x7) + */ + mdio_write(cphy, 0x3, 0x8304, 0xdddd); + return 0; +} + +static int led_link(struct cphy *cphy, u32 do_enable) +{ + u32 led = 0; +#define LINK_ENABLE_BIT 0x1 + + mdio_read(cphy, 0x1, 0x7, &led); + + if (do_enable & LINK_ENABLE_BIT) { + led |= LINK_ENABLE_BIT; + mdio_write(cphy, 0x1, 0x7, led); + } else { + led &= ~LINK_ENABLE_BIT; + mdio_write(cphy, 0x1, 0x7, led); + } + return 0; +} + +/* Port Reset */ +static int mv88x201x_reset(struct cphy *cphy, int wait) +{ + /* This can be done through registers. It is not required since + * a full chip reset is used. + */ + return 0; +} + +static int mv88x201x_interrupt_enable(struct cphy *cphy) +{ + /* Enable PHY LASI interrupts. */ + mdio_write(cphy, 0x1, 0x9002, 0x1); + + /* Enable Marvell interrupts through Elmer0. */ + if (t1_is_asic(cphy->adapter)) { + u32 elmer; + + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); + } + return 0; +} + +static int mv88x201x_interrupt_disable(struct cphy *cphy) +{ + /* Disable PHY LASI interrupts. */ + mdio_write(cphy, 0x1, 0x9002, 0x0); + + /* Disable Marvell interrupts through Elmer0. */ + if (t1_is_asic(cphy->adapter)) { + u32 elmer; + + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer &= ~ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); + } + return 0; +} + +static int mv88x201x_interrupt_clear(struct cphy *cphy) +{ + u32 elmer; + u32 val; + +#ifdef MV88x2010_LINK_STATUS_BUGS + /* Required to read twice before clear takes affect. */ + mdio_read(cphy, 0x1, 0x9003, &val); + mdio_read(cphy, 0x1, 0x9004, &val); + mdio_read(cphy, 0x1, 0x9005, &val); + + /* Read this register after the others above it else + * the register doesn't clear correctly. + */ + mdio_read(cphy, 0x1, 0x1, &val); +#endif + + /* Clear link status. */ + mdio_read(cphy, 0x1, 0x1, &val); + /* Clear PHY LASI interrupts. */ + mdio_read(cphy, 0x1, 0x9005, &val); + +#ifdef MV88x2010_LINK_STATUS_BUGS + /* Do it again. */ + mdio_read(cphy, 0x1, 0x9003, &val); + mdio_read(cphy, 0x1, 0x9004, &val); +#endif + + /* Clear Marvell interrupts through Elmer0. */ + if (t1_is_asic(cphy->adapter)) { + t1_tpi_read(cphy->adapter, A_ELMER0_INT_CAUSE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_CAUSE, elmer); + } + return 0; +} + +static int mv88x201x_interrupt_handler(struct cphy *cphy) +{ + /* Clear interrupts */ + mv88x201x_interrupt_clear(cphy); + + /* We have only enabled link change interrupts and so + * cphy_cause must be a link change interrupt. + */ + return cphy_cause_link_change; +} + +static int mv88x201x_set_loopback(struct cphy *cphy, int on) +{ + return 0; +} + +static int mv88x201x_get_link_status(struct cphy *cphy, int *link_ok, + int *speed, int *duplex, int *fc) +{ + u32 val = 0; +#define LINK_STATUS_BIT 0x4 + + if (link_ok) { + /* Read link status. */ + mdio_read(cphy, 0x1, 0x1, &val); + val &= LINK_STATUS_BIT; + *link_ok = (val == LINK_STATUS_BIT); + /* Turn on/off Link LED */ + led_link(cphy, *link_ok); + } + if (speed) + *speed = SPEED_10000; + if (duplex) + *duplex = DUPLEX_FULL; + if (fc) + *fc = PAUSE_RX | PAUSE_TX; + return 0; +} + +static void mv88x201x_destroy(struct cphy *cphy) +{ + kfree(cphy); +} + +static struct cphy_ops mv88x201x_ops = { + .destroy = mv88x201x_destroy, + .reset = mv88x201x_reset, + .interrupt_enable = mv88x201x_interrupt_enable, + .interrupt_disable = mv88x201x_interrupt_disable, + .interrupt_clear = mv88x201x_interrupt_clear, + .interrupt_handler = mv88x201x_interrupt_handler, + .get_link_status = mv88x201x_get_link_status, + .set_loopback = mv88x201x_set_loopback, +}; + +static struct cphy *mv88x201x_phy_create(adapter_t *adapter, int phy_addr, + struct mdio_ops *mdio_ops) +{ + u32 val; + struct cphy *cphy = kmalloc(sizeof(*cphy), GFP_KERNEL); + + if (!cphy) + return NULL; + memset(cphy, 0, sizeof(*cphy)); + cphy_init(cphy, adapter, phy_addr, &mv88x201x_ops, mdio_ops); + + /* Commands the PHY to enable XFP's clock. */ + mdio_read(cphy, 0x3, 0x8300, &val); + mdio_write(cphy, 0x3, 0x8300, val | 1); + + /* Clear link status. Required because of a bug in the PHY. */ + mdio_read(cphy, 0x1, 0x8, &val); + mdio_read(cphy, 0x3, 0x8, &val); + + /* Allows for Link,Ack LED turn on/off */ + led_init(cphy); + return cphy; +} + +/* Chip Reset */ +static int mv88x201x_phy_reset(adapter_t *adapter) +{ + u32 val; + + t1_tpi_read(adapter, A_ELMER0_GPO, &val); + val &= ~4; + t1_tpi_write(adapter, A_ELMER0_GPO, val); + msleep(100); + + t1_tpi_write(adapter, A_ELMER0_GPO, val | 4); + msleep(1000); + + /* Now lets enable the Laser. Delay 100us */ + t1_tpi_read(adapter, A_ELMER0_GPO, &val); + val |= 0x8000; + t1_tpi_write(adapter, A_ELMER0_GPO, val); + udelay(100); + return 0; +} + +struct gphy t1_mv88x201x_ops = { + mv88x201x_phy_create, + mv88x201x_phy_reset +}; diff --git a/drivers/net/chelsio/osdep.h b/drivers/net/chelsio/osdep.h new file mode 100644 index 000000000000..095cb474434f --- /dev/null +++ b/drivers/net/chelsio/osdep.h @@ -0,0 +1,169 @@ +/***************************************************************************** + * * + * File: osdep.h * + * $Revision: 1.9 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef __CHELSIO_OSDEP_H +#define __CHELSIO_OSDEP_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxgb2.h" + +#define DRV_NAME "cxgb" +#define PFX DRV_NAME ": " + +#define CH_ERR(fmt, ...) printk(KERN_ERR PFX fmt, ## __VA_ARGS__) +#define CH_WARN(fmt, ...) printk(KERN_WARNING PFX fmt, ## __VA_ARGS__) +#define CH_ALERT(fmt, ...) printk(KERN_ALERT PFX fmt, ## __VA_ARGS__) + +/* + * More powerful macro that selectively prints messages based on msg_enable. + * For info and debugging messages. + */ +#define CH_MSG(adapter, level, category, fmt, ...) do { \ + if ((adapter)->msg_enable & NETIF_MSG_##category) \ + printk(KERN_##level PFX "%s: " fmt, (adapter)->name, \ + ## __VA_ARGS__); \ +} while (0) + +#ifdef DEBUG +# define CH_DBG(adapter, category, fmt, ...) \ + CH_MSG(adapter, DEBUG, category, fmt, ## __VA_ARGS__) +#else +# define CH_DBG(fmt, ...) +#endif + +/* Additional NETIF_MSG_* categories */ +#define NETIF_MSG_MMIO 0x8000000 + +#define CH_DEVICE(devid, ssid, idx) \ + { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx } + +#define SUPPORTED_PAUSE (1 << 13) +#define SUPPORTED_LOOPBACK (1 << 15) + +#define ADVERTISED_PAUSE (1 << 13) +#define ADVERTISED_ASYM_PAUSE (1 << 14) + +/* + * Now that we have included the driver's main data structure, + * we typedef it to something the rest of the system understands. + */ +typedef struct adapter adapter_t; + +#define TPI_LOCK(adapter) spin_lock(&(adapter)->tpi_lock) +#define TPI_UNLOCK(adapter) spin_unlock(&(adapter)->tpi_lock) + +void t1_elmer0_ext_intr(adapter_t *adapter); +void t1_link_changed(adapter_t *adapter, int port_id, int link_status, + int speed, int duplex, int fc); + +static inline u16 t1_read_reg_2(adapter_t *adapter, u32 reg_addr) +{ + u16 val = readw(adapter->regs + reg_addr); + + CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, + val); + return val; +} + +static inline void t1_write_reg_2(adapter_t *adapter, u32 reg_addr, u16 val) +{ + CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, + val); + writew(val, adapter->regs + reg_addr); +} + +static inline u32 t1_read_reg_4(adapter_t *adapter, u32 reg_addr) +{ + u32 val = readl(adapter->regs + reg_addr); + + CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, + val); + return val; +} + +static inline void t1_write_reg_4(adapter_t *adapter, u32 reg_addr, u32 val) +{ + CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, + val); + writel(val, adapter->regs + reg_addr); +} + +static inline const char *port_name(adapter_t *adapter, int port_idx) +{ + return adapter->port[port_idx].dev->name; +} + +static inline void t1_set_hw_addr(adapter_t *adapter, int port_idx, + u8 hw_addr[]) +{ + memcpy(adapter->port[port_idx].dev->dev_addr, hw_addr, ETH_ALEN); +} + +struct t1_rx_mode { + struct net_device *dev; + u32 idx; + struct dev_mc_list *list; +}; + +#define t1_rx_mode_promisc(rm) (rm->dev->flags & IFF_PROMISC) +#define t1_rx_mode_allmulti(rm) (rm->dev->flags & IFF_ALLMULTI) +#define t1_rx_mode_mc_cnt(rm) (rm->dev->mc_count) + +static inline u8 *t1_get_next_mcaddr(struct t1_rx_mode *rm) +{ + u8 *addr = 0; + + if (rm->idx++ < rm->dev->mc_count) { + addr = rm->list->dmi_addr; + rm->list = rm->list->next; + } + return addr; +} + +#endif diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c new file mode 100644 index 000000000000..17bd20f60d99 --- /dev/null +++ b/drivers/net/chelsio/pm3393.c @@ -0,0 +1,831 @@ +/***************************************************************************** + * * + * File: pm3393.c * + * $Revision: 1.9 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * PMC/SIERRA (pm3393) MAC-PHY functionality. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" +#include "regs.h" +#include "gmac.h" +#include "elmer0.h" +#include "suni1x10gexp_regs.h" + +/* 802.3ae 10Gb/s MDIO Manageable Device(MMD) + */ +#define MMD_RESERVED 0 +#define MMD_PMAPMD 1 +#define MMD_WIS 2 +#define MMD_PCS 3 +#define MMD_PHY_XGXS 4 /* XGMII Extender Sublayer */ +#define MMD_DTE_XGXS 5 + +#define PHY_XGXS_CTRL_1 0 +#define PHY_XGXS_STATUS_1 1 + +#define OFFSET(REG_ADDR) (REG_ADDR << 2) + +/* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */ +#define MAX_FRAME_SIZE 9600 + +#define IPG 12 +#define TXXG_CONF1_VAL ((IPG << SUNI1x10GEXP_BITOFF_TXXG_IPGT) | \ + SUNI1x10GEXP_BITMSK_TXXG_32BIT_ALIGN | SUNI1x10GEXP_BITMSK_TXXG_CRCEN | \ + SUNI1x10GEXP_BITMSK_TXXG_PADEN) +#define RXXG_CONF1_VAL (SUNI1x10GEXP_BITMSK_RXXG_PUREP | 0x14 | \ + SUNI1x10GEXP_BITMSK_RXXG_FLCHK | SUNI1x10GEXP_BITMSK_RXXG_CRC_STRIP) + +/* Update statistics every 15 minutes */ +#define STATS_TICK_SECS (15 * 60) + +enum { /* RMON registers */ + RxOctetsReceivedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_1_LOW, + RxUnicastFramesReceivedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_4_LOW, + RxMulticastFramesReceivedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_5_LOW, + RxBroadcastFramesReceivedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_6_LOW, + RxPAUSEMACCtrlFramesReceived = SUNI1x10GEXP_REG_MSTAT_COUNTER_8_LOW, + RxFrameCheckSequenceErrors = SUNI1x10GEXP_REG_MSTAT_COUNTER_10_LOW, + RxFramesLostDueToInternalMACErrors = SUNI1x10GEXP_REG_MSTAT_COUNTER_11_LOW, + RxSymbolErrors = SUNI1x10GEXP_REG_MSTAT_COUNTER_12_LOW, + RxInRangeLengthErrors = SUNI1x10GEXP_REG_MSTAT_COUNTER_13_LOW, + RxFramesTooLongErrors = SUNI1x10GEXP_REG_MSTAT_COUNTER_15_LOW, + RxJabbers = SUNI1x10GEXP_REG_MSTAT_COUNTER_16_LOW, + RxFragments = SUNI1x10GEXP_REG_MSTAT_COUNTER_17_LOW, + RxUndersizedFrames = SUNI1x10GEXP_REG_MSTAT_COUNTER_18_LOW, + + TxOctetsTransmittedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_33_LOW, + TxFramesLostDueToInternalMACTransmissionError = SUNI1x10GEXP_REG_MSTAT_COUNTER_35_LOW, + TxTransmitSystemError = SUNI1x10GEXP_REG_MSTAT_COUNTER_36_LOW, + TxUnicastFramesTransmittedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_38_LOW, + TxMulticastFramesTransmittedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_40_LOW, + TxBroadcastFramesTransmittedOK = SUNI1x10GEXP_REG_MSTAT_COUNTER_42_LOW, + TxPAUSEMACCtrlFramesTransmitted = SUNI1x10GEXP_REG_MSTAT_COUNTER_43_LOW +}; + +struct _cmac_instance { + u8 enabled; + u8 fc; + u8 mac_addr[6]; +}; + +static int pmread(struct cmac *cmac, u32 reg, u32 * data32) +{ + t1_tpi_read(cmac->adapter, OFFSET(reg), data32); + return 0; +} + +static int pmwrite(struct cmac *cmac, u32 reg, u32 data32) +{ + t1_tpi_write(cmac->adapter, OFFSET(reg), data32); + return 0; +} + +/* Port reset. */ +static int pm3393_reset(struct cmac *cmac) +{ + return 0; +} + +/* + * Enable interrupts for the PM3393 + + 1. Enable PM3393 BLOCK interrupts. + 2. Enable PM3393 Master Interrupt bit(INTE) + 3. Enable ELMER's PM3393 bit. + 4. Enable Terminator external interrupt. +*/ +static int pm3393_interrupt_enable(struct cmac *cmac) +{ + u32 pl_intr; + + /* PM3393 - Enabling all hardware block interrupts. + */ + pmwrite(cmac, SUNI1x10GEXP_REG_SERDES_3125_INTERRUPT_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_XRF_INTERRUPT_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_XRF_DIAG_INTERRUPT_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_RXOAM_INTERRUPT_ENABLE, 0xffff); + + /* Don't interrupt on statistics overflow, we are polling */ + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_0, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_1, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_2, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_3, 0); + + pmwrite(cmac, SUNI1x10GEXP_REG_IFLX_FIFO_OVERFLOW_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4ODP_INTERRUPT_MASK, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_XTEF_INTERRUPT_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_TXOAM_INTERRUPT_ENABLE, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_CONFIG_3, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_MASK, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_CONFIG_3, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4IDU_INTERRUPT_MASK, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_EFLX_FIFO_OVERFLOW_ERROR_ENABLE, 0xffff); + + /* PM3393 - Global interrupt enable + */ + /* TBD XXX Disable for now until we figure out why error interrupts keep asserting. */ + pmwrite(cmac, SUNI1x10GEXP_REG_GLOBAL_INTERRUPT_ENABLE, + 0 /*SUNI1x10GEXP_BITMSK_TOP_INTE */ ); + + /* TERMINATOR - PL_INTERUPTS_EXT */ + pl_intr = t1_read_reg_4(cmac->adapter, A_PL_ENABLE); + pl_intr |= F_PL_INTR_EXT; + t1_write_reg_4(cmac->adapter, A_PL_ENABLE, pl_intr); + return 0; +} + +static int pm3393_interrupt_disable(struct cmac *cmac) +{ + u32 elmer; + + /* PM3393 - Enabling HW interrupt blocks. */ + pmwrite(cmac, SUNI1x10GEXP_REG_SERDES_3125_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_XRF_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_XRF_DIAG_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_RXOAM_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_0, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_1, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_2, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_3, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_IFLX_FIFO_OVERFLOW_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4ODP_INTERRUPT_MASK, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_XTEF_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_TXOAM_INTERRUPT_ENABLE, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_CONFIG_3, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_MASK, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_CONFIG_3, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_PL4IDU_INTERRUPT_MASK, 0); + pmwrite(cmac, SUNI1x10GEXP_REG_EFLX_FIFO_OVERFLOW_ERROR_ENABLE, 0); + + /* PM3393 - Global interrupt enable */ + pmwrite(cmac, SUNI1x10GEXP_REG_GLOBAL_INTERRUPT_ENABLE, 0); + + /* ELMER - External chip interrupts. */ + t1_tpi_read(cmac->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer &= ~ELMER0_GP_BIT1; + t1_tpi_write(cmac->adapter, A_ELMER0_INT_ENABLE, elmer); + + /* TERMINATOR - PL_INTERUPTS_EXT */ + /* DO NOT DISABLE TERMINATOR's EXTERNAL INTERRUPTS. ANOTHER CHIP + * COULD WANT THEM ENABLED. We disable PM3393 at the ELMER level. + */ + + return 0; +} + +static int pm3393_interrupt_clear(struct cmac *cmac) +{ + u32 elmer; + u32 pl_intr; + u32 val32; + + /* PM3393 - Clearing HW interrupt blocks. Note, this assumes + * bit WCIMODE=0 for a clear-on-read. + */ + pmread(cmac, SUNI1x10GEXP_REG_SERDES_3125_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_XRF_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_XRF_DIAG_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_RXOAM_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_PL4ODP_INTERRUPT, &val32); + pmread(cmac, SUNI1x10GEXP_REG_XTEF_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_IFLX_FIFO_OVERFLOW_INTERRUPT, &val32); + pmread(cmac, SUNI1x10GEXP_REG_TXOAM_INTERRUPT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_RXXG_INTERRUPT, &val32); + pmread(cmac, SUNI1x10GEXP_REG_TXXG_INTERRUPT, &val32); + pmread(cmac, SUNI1x10GEXP_REG_PL4IDU_INTERRUPT, &val32); + pmread(cmac, SUNI1x10GEXP_REG_EFLX_FIFO_OVERFLOW_ERROR_INDICATION, + &val32); + pmread(cmac, SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_STATUS, &val32); + pmread(cmac, SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_CHANGE, &val32); + + /* PM3393 - Global interrupt status + */ + pmread(cmac, SUNI1x10GEXP_REG_MASTER_INTERRUPT_STATUS, &val32); + + /* ELMER - External chip interrupts. + */ + t1_tpi_read(cmac->adapter, A_ELMER0_INT_CAUSE, &elmer); + elmer |= ELMER0_GP_BIT1; + t1_tpi_write(cmac->adapter, A_ELMER0_INT_CAUSE, elmer); + + /* TERMINATOR - PL_INTERUPTS_EXT + */ + pl_intr = t1_read_reg_4(cmac->adapter, A_PL_CAUSE); + pl_intr |= F_PL_INTR_EXT; + t1_write_reg_4(cmac->adapter, A_PL_CAUSE, pl_intr); + + return 0; +} + +/* Interrupt handler */ +static int pm3393_interrupt_handler(struct cmac *cmac) +{ + u32 master_intr_status; +/* + 1. Read master interrupt register. + 2. Read BLOCK's interrupt status registers. + 3. Handle BLOCK interrupts. +*/ + /* Read the master interrupt status register. */ + pmread(cmac, SUNI1x10GEXP_REG_MASTER_INTERRUPT_STATUS, + &master_intr_status); + CH_DBG(cmac->adapter, INTR, "PM3393 intr cause 0x%x\n", + master_intr_status); + + /* TBD XXX Lets just clear everything for now */ + pm3393_interrupt_clear(cmac); + + return 0; +} + +static int pm3393_enable(struct cmac *cmac, int which) +{ + if (which & MAC_DIRECTION_RX) + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_CONFIG_1, + (RXXG_CONF1_VAL | SUNI1x10GEXP_BITMSK_RXXG_RXEN)); + + if (which & MAC_DIRECTION_TX) { + u32 val = TXXG_CONF1_VAL | SUNI1x10GEXP_BITMSK_TXXG_TXEN0; + + if (cmac->instance->fc & PAUSE_RX) + val |= SUNI1x10GEXP_BITMSK_TXXG_FCRX; + if (cmac->instance->fc & PAUSE_TX) + val |= SUNI1x10GEXP_BITMSK_TXXG_FCTX; + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_CONFIG_1, val); + } + + cmac->instance->enabled |= which; + return 0; +} + +static int pm3393_enable_port(struct cmac *cmac, int which) +{ + /* Clear port statistics */ + pmwrite(cmac, SUNI1x10GEXP_REG_MSTAT_CONTROL, + SUNI1x10GEXP_BITMSK_MSTAT_CLEAR); + udelay(2); + memset(&cmac->stats, 0, sizeof(struct cmac_statistics)); + + pm3393_enable(cmac, which); + + /* + * XXX This should be done by the PHY and preferrably not at all. + * The PHY doesn't give us link status indication on its own so have + * the link management code query it instead. + */ + { + extern void link_changed(adapter_t *adapter, int port_id); + + link_changed(cmac->adapter, 0); + } + return 0; +} + +static int pm3393_disable(struct cmac *cmac, int which) +{ + if (which & MAC_DIRECTION_RX) + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_CONFIG_1, RXXG_CONF1_VAL); + if (which & MAC_DIRECTION_TX) + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_CONFIG_1, TXXG_CONF1_VAL); + + /* + * The disable is graceful. Give the PM3393 time. Can't wait very + * long here, we may be holding locks. + */ + udelay(20); + + cmac->instance->enabled &= ~which; + return 0; +} + +static int pm3393_loopback_enable(struct cmac *cmac) +{ + return 0; +} + +static int pm3393_loopback_disable(struct cmac *cmac) +{ + return 0; +} + +static int pm3393_set_mtu(struct cmac *cmac, int mtu) +{ + int enabled = cmac->instance->enabled; + + /* MAX_FRAME_SIZE includes header + FCS, mtu doesn't */ + mtu += 14 + 4; + if (mtu > MAX_FRAME_SIZE) + return -EINVAL; + + /* Disable Rx/Tx MAC before configuring it. */ + if (enabled) + pm3393_disable(cmac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); + + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MAX_FRAME_LENGTH, mtu); + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_MAX_FRAME_SIZE, mtu); + + if (enabled) + pm3393_enable(cmac, enabled); + return 0; +} + +static u32 calc_crc(u8 *b, int len) +{ + int i; + u32 crc = (u32)~0; + + /* calculate crc one bit at a time */ + while (len--) { + crc ^= *b++; + for (i = 0; i < 8; i++) { + if (crc & 0x1) + crc = (crc >> 1) ^ 0xedb88320; + else + crc = (crc >> 1); + } + } + + /* reverse bits */ + crc = ((crc >> 4) & 0x0f0f0f0f) | ((crc << 4) & 0xf0f0f0f0); + crc = ((crc >> 2) & 0x33333333) | ((crc << 2) & 0xcccccccc); + crc = ((crc >> 1) & 0x55555555) | ((crc << 1) & 0xaaaaaaaa); + /* swap bytes */ + crc = (crc >> 16) | (crc << 16); + crc = (crc >> 8 & 0x00ff00ff) | (crc << 8 & 0xff00ff00); + + return crc; +} + +static int pm3393_set_rx_mode(struct cmac *cmac, struct t1_rx_mode *rm) +{ + int enabled = cmac->instance->enabled & MAC_DIRECTION_RX; + u32 rx_mode; + + /* Disable MAC RX before reconfiguring it */ + if (enabled) + pm3393_disable(cmac, MAC_DIRECTION_RX); + + pmread(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_2, &rx_mode); + rx_mode &= ~(SUNI1x10GEXP_BITMSK_RXXG_PMODE | + SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_2, + (u16)rx_mode); + + if (t1_rx_mode_promisc(rm)) { + /* Promiscuous mode. */ + rx_mode |= SUNI1x10GEXP_BITMSK_RXXG_PMODE; + } + if (t1_rx_mode_allmulti(rm)) { + /* Accept all multicast. */ + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_LOW, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDLOW, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDHIGH, 0xffff); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_HIGH, 0xffff); + rx_mode |= SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN; + } else if (t1_rx_mode_mc_cnt(rm)) { + /* Accept one or more multicast(s). */ + u8 *addr; + int bit; + u16 mc_filter[4] = { 0, }; + + while ((addr = t1_get_next_mcaddr(rm))) { + bit = (calc_crc(addr, ETH_ALEN) >> 23) & 0x3f; /* bit[23:28] */ + mc_filter[bit >> 4] |= 1 << (bit & 0xf); + } + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_LOW, mc_filter[0]); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDLOW, mc_filter[1]); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDHIGH, mc_filter[2]); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_HIGH, mc_filter[3]); + rx_mode |= SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN; + } + + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_2, (u16)rx_mode); + + if (enabled) + pm3393_enable(cmac, MAC_DIRECTION_RX); + + return 0; +} + +static int pm3393_get_speed_duplex_fc(struct cmac *cmac, int *speed, + int *duplex, int *fc) +{ + if (speed) + *speed = SPEED_10000; + if (duplex) + *duplex = DUPLEX_FULL; + if (fc) + *fc = cmac->instance->fc; + return 0; +} + +static int pm3393_set_speed_duplex_fc(struct cmac *cmac, int speed, int duplex, + int fc) +{ + if (speed >= 0 && speed != SPEED_10000) + return -1; + if (duplex >= 0 && duplex != DUPLEX_FULL) + return -1; + if (fc & ~(PAUSE_TX | PAUSE_RX)) + return -1; + + if (fc != cmac->instance->fc) { + cmac->instance->fc = (u8) fc; + if (cmac->instance->enabled & MAC_DIRECTION_TX) + pm3393_enable(cmac, MAC_DIRECTION_TX); + } + return 0; +} + +#define RMON_UPDATE(mac, name, stat_name) \ + { \ + t1_tpi_read((mac)->adapter, OFFSET(name), &val0); \ + t1_tpi_read((mac)->adapter, OFFSET(((name)+1)), &val1); \ + t1_tpi_read((mac)->adapter, OFFSET(((name)+2)), &val2); \ + (mac)->stats.stat_name = ((u64)val0 & 0xffff) | \ + (((u64)val1 & 0xffff) << 16) | \ + (((u64)val2 & 0xff) << 32) | \ + ((mac)->stats.stat_name & \ + (~(u64)0 << 40)); \ + if (ro & \ + ((name - SUNI1x10GEXP_REG_MSTAT_COUNTER_0_LOW) >> 2)) \ + (mac)->stats.stat_name += ((u64)1 << 40); \ + } + +static const struct cmac_statistics *pm3393_update_statistics(struct cmac *mac, + int flag) +{ + u64 ro; + u32 val0, val1, val2, val3; + + /* Snap the counters */ + pmwrite(mac, SUNI1x10GEXP_REG_MSTAT_CONTROL, + SUNI1x10GEXP_BITMSK_MSTAT_SNAP); + + /* Counter rollover, clear on read */ + pmread(mac, SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_0, &val0); + pmread(mac, SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_1, &val1); + pmread(mac, SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_2, &val2); + pmread(mac, SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_3, &val3); + ro = ((u64)val0 & 0xffff) | (((u64)val1 & 0xffff) << 16) | + (((u64)val2 & 0xffff) << 32) | (((u64)val3 & 0xffff) << 48); + + /* Rx stats */ + RMON_UPDATE(mac, RxOctetsReceivedOK, RxOctetsOK); + RMON_UPDATE(mac, RxUnicastFramesReceivedOK, RxUnicastFramesOK); + RMON_UPDATE(mac, RxMulticastFramesReceivedOK, RxMulticastFramesOK); + RMON_UPDATE(mac, RxBroadcastFramesReceivedOK, RxBroadcastFramesOK); + RMON_UPDATE(mac, RxPAUSEMACCtrlFramesReceived, RxPauseFrames); + RMON_UPDATE(mac, RxFrameCheckSequenceErrors, RxFCSErrors); + RMON_UPDATE(mac, RxFramesLostDueToInternalMACErrors, + RxInternalMACRcvError); + RMON_UPDATE(mac, RxSymbolErrors, RxSymbolErrors); + RMON_UPDATE(mac, RxInRangeLengthErrors, RxInRangeLengthErrors); + RMON_UPDATE(mac, RxFramesTooLongErrors , RxFrameTooLongErrors); + RMON_UPDATE(mac, RxJabbers, RxJabberErrors); + RMON_UPDATE(mac, RxFragments, RxRuntErrors); + RMON_UPDATE(mac, RxUndersizedFrames, RxRuntErrors); + + /* Tx stats */ + RMON_UPDATE(mac, TxOctetsTransmittedOK, TxOctetsOK); + RMON_UPDATE(mac, TxFramesLostDueToInternalMACTransmissionError, + TxInternalMACXmitError); + RMON_UPDATE(mac, TxTransmitSystemError, TxFCSErrors); + RMON_UPDATE(mac, TxUnicastFramesTransmittedOK, TxUnicastFramesOK); + RMON_UPDATE(mac, TxMulticastFramesTransmittedOK, TxMulticastFramesOK); + RMON_UPDATE(mac, TxBroadcastFramesTransmittedOK, TxBroadcastFramesOK); + RMON_UPDATE(mac, TxPAUSEMACCtrlFramesTransmitted, TxPauseFrames); + + return &mac->stats; +} + +static int pm3393_macaddress_get(struct cmac *cmac, u8 mac_addr[6]) +{ + memcpy(mac_addr, cmac->instance->mac_addr, 6); + return 0; +} + +static int pm3393_macaddress_set(struct cmac *cmac, u8 ma[6]) +{ + u32 val, lo, mid, hi, enabled = cmac->instance->enabled; + + /* + * MAC addr: 00:07:43:00:13:09 + * + * ma[5] = 0x09 + * ma[4] = 0x13 + * ma[3] = 0x00 + * ma[2] = 0x43 + * ma[1] = 0x07 + * ma[0] = 0x00 + * + * The PM3393 requires byte swapping and reverse order entry + * when programming MAC addresses: + * + * low_bits[15:0] = ma[1]:ma[0] + * mid_bits[31:16] = ma[3]:ma[2] + * high_bits[47:32] = ma[5]:ma[4] + */ + + /* Store local copy */ + memcpy(cmac->instance->mac_addr, ma, 6); + + lo = ((u32) ma[1] << 8) | (u32) ma[0]; + mid = ((u32) ma[3] << 8) | (u32) ma[2]; + hi = ((u32) ma[5] << 8) | (u32) ma[4]; + + /* Disable Rx/Tx MAC before configuring it. */ + if (enabled) + pm3393_disable(cmac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); + + /* Set RXXG Station Address */ + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_SA_15_0, lo); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_SA_31_16, mid); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_SA_47_32, hi); + + /* Set TXXG Station Address */ + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_SA_15_0, lo); + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_SA_31_16, mid); + pmwrite(cmac, SUNI1x10GEXP_REG_TXXG_SA_47_32, hi); + + /* Setup Exact Match Filter 1 with our MAC address + * + * Must disable exact match filter before configuring it. + */ + pmread(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_0, &val); + val &= 0xff0f; + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_0, val); + + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_LOW, lo); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_MID, mid); + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_HIGH, hi); + + val |= 0x0090; + pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_0, val); + + if (enabled) + pm3393_enable(cmac, enabled); + return 0; +} + +static void pm3393_destroy(struct cmac *cmac) +{ + kfree(cmac); +} + +static struct cmac_ops pm3393_ops = { + .destroy = pm3393_destroy, + .reset = pm3393_reset, + .interrupt_enable = pm3393_interrupt_enable, + .interrupt_disable = pm3393_interrupt_disable, + .interrupt_clear = pm3393_interrupt_clear, + .interrupt_handler = pm3393_interrupt_handler, + .enable = pm3393_enable_port, + .disable = pm3393_disable, + .loopback_enable = pm3393_loopback_enable, + .loopback_disable = pm3393_loopback_disable, + .set_mtu = pm3393_set_mtu, + .set_rx_mode = pm3393_set_rx_mode, + .get_speed_duplex_fc = pm3393_get_speed_duplex_fc, + .set_speed_duplex_fc = pm3393_set_speed_duplex_fc, + .statistics_update = pm3393_update_statistics, + .macaddress_get = pm3393_macaddress_get, + .macaddress_set = pm3393_macaddress_set +}; + +static struct cmac *pm3393_mac_create(adapter_t *adapter, int index) +{ + struct cmac *cmac; + + cmac = kmalloc(sizeof(*cmac) + sizeof(cmac_instance), GFP_KERNEL); + if (!cmac) + return NULL; + memset(cmac, 0, sizeof(*cmac)); + + cmac->ops = &pm3393_ops; + cmac->instance = (cmac_instance *) (cmac + 1); + cmac->adapter = adapter; + cmac->instance->fc = PAUSE_TX | PAUSE_RX; + + t1_tpi_write(adapter, OFFSET(0x0001), 0x00008000); + t1_tpi_write(adapter, OFFSET(0x0001), 0x00000000); + t1_tpi_write(adapter, OFFSET(0x2308), 0x00009800); + t1_tpi_write(adapter, OFFSET(0x2305), 0x00001001); /* PL4IO Enable */ + t1_tpi_write(adapter, OFFSET(0x2320), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2321), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2322), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2323), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2324), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2325), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2326), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2327), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2328), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x2329), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232a), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232b), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232c), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232d), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232e), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x232f), 0x00008800); + t1_tpi_write(adapter, OFFSET(0x230d), 0x00009c00); + t1_tpi_write(adapter, OFFSET(0x2304), 0x00000202); /* PL4IO Calendar Repetitions */ + + t1_tpi_write(adapter, OFFSET(0x3200), 0x00008080); /* EFLX Enable */ + t1_tpi_write(adapter, OFFSET(0x3210), 0x00000000); /* EFLX Channel Deprovision */ + t1_tpi_write(adapter, OFFSET(0x3203), 0x00000000); /* EFLX Low Limit */ + t1_tpi_write(adapter, OFFSET(0x3204), 0x00000040); /* EFLX High Limit */ + t1_tpi_write(adapter, OFFSET(0x3205), 0x000002cc); /* EFLX Almost Full */ + t1_tpi_write(adapter, OFFSET(0x3206), 0x00000199); /* EFLX Almost Empty */ + t1_tpi_write(adapter, OFFSET(0x3207), 0x00000240); /* EFLX Cut Through Threshold */ + t1_tpi_write(adapter, OFFSET(0x3202), 0x00000000); /* EFLX Indirect Register Update */ + t1_tpi_write(adapter, OFFSET(0x3210), 0x00000001); /* EFLX Channel Provision */ + t1_tpi_write(adapter, OFFSET(0x3208), 0x0000ffff); /* EFLX Undocumented */ + t1_tpi_write(adapter, OFFSET(0x320a), 0x0000ffff); /* EFLX Undocumented */ + t1_tpi_write(adapter, OFFSET(0x320c), 0x0000ffff); /* EFLX enable overflow interrupt The other bit are undocumented */ + t1_tpi_write(adapter, OFFSET(0x320e), 0x0000ffff); /* EFLX Undocumented */ + + t1_tpi_write(adapter, OFFSET(0x2200), 0x0000c000); /* IFLX Configuration - enable */ + t1_tpi_write(adapter, OFFSET(0x2201), 0x00000000); /* IFLX Channel Deprovision */ + t1_tpi_write(adapter, OFFSET(0x220e), 0x00000000); /* IFLX Low Limit */ + t1_tpi_write(adapter, OFFSET(0x220f), 0x00000100); /* IFLX High Limit */ + t1_tpi_write(adapter, OFFSET(0x2210), 0x00000c00); /* IFLX Almost Full Limit */ + t1_tpi_write(adapter, OFFSET(0x2211), 0x00000599); /* IFLX Almost Empty Limit */ + t1_tpi_write(adapter, OFFSET(0x220d), 0x00000000); /* IFLX Indirect Register Update */ + t1_tpi_write(adapter, OFFSET(0x2201), 0x00000001); /* IFLX Channel Provision */ + t1_tpi_write(adapter, OFFSET(0x2203), 0x0000ffff); /* IFLX Undocumented */ + t1_tpi_write(adapter, OFFSET(0x2205), 0x0000ffff); /* IFLX Undocumented */ + t1_tpi_write(adapter, OFFSET(0x2209), 0x0000ffff); /* IFLX Enable overflow interrupt. The other bit are undocumented */ + + t1_tpi_write(adapter, OFFSET(0x2241), 0xfffffffe); /* PL4MOS Undocumented */ + t1_tpi_write(adapter, OFFSET(0x2242), 0x0000ffff); /* PL4MOS Undocumented */ + t1_tpi_write(adapter, OFFSET(0x2243), 0x00000008); /* PL4MOS Starving Burst Size */ + t1_tpi_write(adapter, OFFSET(0x2244), 0x00000008); /* PL4MOS Hungry Burst Size */ + t1_tpi_write(adapter, OFFSET(0x2245), 0x00000008); /* PL4MOS Transfer Size */ + t1_tpi_write(adapter, OFFSET(0x2240), 0x00000005); /* PL4MOS Disable */ + + t1_tpi_write(adapter, OFFSET(0x2280), 0x00002103); /* PL4ODP Training Repeat and SOP rule */ + t1_tpi_write(adapter, OFFSET(0x2284), 0x00000000); /* PL4ODP MAX_T setting */ + + t1_tpi_write(adapter, OFFSET(0x3280), 0x00000087); /* PL4IDU Enable data forward, port state machine. Set ALLOW_NON_ZERO_OLB */ + t1_tpi_write(adapter, OFFSET(0x3282), 0x0000001f); /* PL4IDU Enable Dip4 check error interrupts */ + + t1_tpi_write(adapter, OFFSET(0x3040), 0x0c32); /* # TXXG Config */ + /* For T1 use timer based Mac flow control. */ + if (t1_is_T1B(adapter)) + t1_tpi_write(adapter, OFFSET(0x304d), 0x8000); + t1_tpi_write(adapter, OFFSET(0x2040), 0x059c); /* # RXXG Config */ + t1_tpi_write(adapter, OFFSET(0x2049), 0x0000); /* # RXXG Cut Through */ + t1_tpi_write(adapter, OFFSET(0x2070), 0x0000); /* # Disable promiscuous mode */ + + /* Setup Exact Match Filter 0 to allow broadcast packets. + */ + t1_tpi_write(adapter, OFFSET(0x206e), 0x0000); /* # Disable Match Enable bit */ + t1_tpi_write(adapter, OFFSET(0x204a), 0xffff); /* # low addr */ + t1_tpi_write(adapter, OFFSET(0x204b), 0xffff); /* # mid addr */ + t1_tpi_write(adapter, OFFSET(0x204c), 0xffff); /* # high addr */ + t1_tpi_write(adapter, OFFSET(0x206e), 0x0009); /* # Enable Match Enable bit */ + + t1_tpi_write(adapter, OFFSET(0x0003), 0x0000); /* # NO SOP/ PAD_EN setup */ + t1_tpi_write(adapter, OFFSET(0x0100), 0x0ff0); /* # RXEQB disabled */ + t1_tpi_write(adapter, OFFSET(0x0101), 0x0f0f); /* # No Preemphasis */ + + return cmac; +} + +static int pm3393_mac_reset(adapter_t * adapter) +{ + u32 val; + u32 x; + u32 is_pl4_reset_finished; + u32 is_pl4_outof_lock; + u32 is_xaui_mabc_pll_locked; + u32 successful_reset; + int i; + + /* The following steps are required to properly reset + * the PM3393. This information is provided in the + * PM3393 datasheet (Issue 2: November 2002) + * section 13.1 -- Device Reset. + * + * The PM3393 has three types of components that are + * individually reset: + * + * DRESETB - Digital circuitry + * PL4_ARESETB - PL4 analog circuitry + * XAUI_ARESETB - XAUI bus analog circuitry + * + * Steps to reset PM3393 using RSTB pin: + * + * 1. Assert RSTB pin low ( write 0 ) + * 2. Wait at least 1ms to initiate a complete initialization of device. + * 3. Wait until all external clocks and REFSEL are stable. + * 4. Wait minimum of 1ms. (after external clocks and REFEL are stable) + * 5. De-assert RSTB ( write 1 ) + * 6. Wait until internal timers to expires after ~14ms. + * - Allows analog clock synthesizer(PL4CSU) to stabilize to + * selected reference frequency before allowing the digital + * portion of the device to operate. + * 7. Wait at least 200us for XAUI interface to stabilize. + * 8. Verify the PM3393 came out of reset successfully. + * Set successful reset flag if everything worked else try again + * a few more times. + */ + + successful_reset = 0; + for (i = 0; i < 3 && !successful_reset; i++) { + /* 1 */ + t1_tpi_read(adapter, A_ELMER0_GPO, &val); + val &= ~1; + t1_tpi_write(adapter, A_ELMER0_GPO, val); + + /* 2 */ + msleep(1); + + /* 3 */ + msleep(1); + + /* 4 */ + msleep(2 /*1 extra ms for safety */ ); + + /* 5 */ + val |= 1; + t1_tpi_write(adapter, A_ELMER0_GPO, val); + + /* 6 */ + msleep(15 /*1 extra ms for safety */ ); + + /* 7 */ + msleep(1); + + /* 8 */ + + /* Has PL4 analog block come out of reset correctly? */ + t1_tpi_read(adapter, OFFSET(SUNI1x10GEXP_REG_DEVICE_STATUS), &val); + is_pl4_reset_finished = (val & SUNI1x10GEXP_BITMSK_TOP_EXPIRED); + + /* TBD XXX SUNI1x10GEXP_BITMSK_TOP_PL4_IS_DOOL gets locked later in the init sequence + * figure out why? */ + + /* Have all PL4 block clocks locked? */ + x = (SUNI1x10GEXP_BITMSK_TOP_PL4_ID_DOOL + /*| SUNI1x10GEXP_BITMSK_TOP_PL4_IS_DOOL */ | + SUNI1x10GEXP_BITMSK_TOP_PL4_ID_ROOL | + SUNI1x10GEXP_BITMSK_TOP_PL4_IS_ROOL | + SUNI1x10GEXP_BITMSK_TOP_PL4_OUT_ROOL); + is_pl4_outof_lock = (val & x); + + /* ??? If this fails, might be able to software reset the XAUI part + * and try to recover... thus saving us from doing another HW reset */ + /* Has the XAUI MABC PLL circuitry stablized? */ + is_xaui_mabc_pll_locked = + (val & SUNI1x10GEXP_BITMSK_TOP_SXRA_EXPIRED); + + successful_reset = (is_pl4_reset_finished && !is_pl4_outof_lock + && is_xaui_mabc_pll_locked); + + CH_DBG(adapter, HW, + "PM3393 HW reset %d: pl4_reset 0x%x, val 0x%x, " + "is_pl4_outof_lock 0x%x, xaui_locked 0x%x\n", + i, is_pl4_reset_finished, val, is_pl4_outof_lock, + is_xaui_mabc_pll_locked); + } + return successful_reset ? 0 : 1; +} + +struct gmac t1_pm3393_ops = { + STATS_TICK_SECS, + pm3393_mac_create, + pm3393_mac_reset +}; diff --git a/drivers/net/chelsio/regs.h b/drivers/net/chelsio/regs.h new file mode 100644 index 000000000000..5a70803eb1b6 --- /dev/null +++ b/drivers/net/chelsio/regs.h @@ -0,0 +1,453 @@ +/***************************************************************************** + * * + * File: regs.h * + * $Revision: 1.4 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +/* Do not edit this file */ + +/* SGE registers */ +#define A_SG_CONTROL 0x0 + +#define S_CMDQ0_ENABLE 0 +#define V_CMDQ0_ENABLE(x) ((x) << S_CMDQ0_ENABLE) +#define F_CMDQ0_ENABLE V_CMDQ0_ENABLE(1U) + +#define S_CMDQ1_ENABLE 1 +#define V_CMDQ1_ENABLE(x) ((x) << S_CMDQ1_ENABLE) +#define F_CMDQ1_ENABLE V_CMDQ1_ENABLE(1U) + +#define S_FL0_ENABLE 2 +#define V_FL0_ENABLE(x) ((x) << S_FL0_ENABLE) +#define F_FL0_ENABLE V_FL0_ENABLE(1U) + +#define S_FL1_ENABLE 3 +#define V_FL1_ENABLE(x) ((x) << S_FL1_ENABLE) +#define F_FL1_ENABLE V_FL1_ENABLE(1U) + +#define S_CPL_ENABLE 4 +#define V_CPL_ENABLE(x) ((x) << S_CPL_ENABLE) +#define F_CPL_ENABLE V_CPL_ENABLE(1U) + +#define S_RESPONSE_QUEUE_ENABLE 5 +#define V_RESPONSE_QUEUE_ENABLE(x) ((x) << S_RESPONSE_QUEUE_ENABLE) +#define F_RESPONSE_QUEUE_ENABLE V_RESPONSE_QUEUE_ENABLE(1U) + +#define S_CMDQ_PRIORITY 6 +#define M_CMDQ_PRIORITY 0x3 +#define V_CMDQ_PRIORITY(x) ((x) << S_CMDQ_PRIORITY) +#define G_CMDQ_PRIORITY(x) (((x) >> S_CMDQ_PRIORITY) & M_CMDQ_PRIORITY) + +#define S_DISABLE_CMDQ1_GTS 9 +#define V_DISABLE_CMDQ1_GTS(x) ((x) << S_DISABLE_CMDQ1_GTS) +#define F_DISABLE_CMDQ1_GTS V_DISABLE_CMDQ1_GTS(1U) + +#define S_ENABLE_BIG_ENDIAN 12 +#define V_ENABLE_BIG_ENDIAN(x) ((x) << S_ENABLE_BIG_ENDIAN) +#define F_ENABLE_BIG_ENDIAN V_ENABLE_BIG_ENDIAN(1U) + +#define S_ISCSI_COALESCE 14 +#define V_ISCSI_COALESCE(x) ((x) << S_ISCSI_COALESCE) +#define F_ISCSI_COALESCE V_ISCSI_COALESCE(1U) + +#define S_RX_PKT_OFFSET 15 +#define V_RX_PKT_OFFSET(x) ((x) << S_RX_PKT_OFFSET) + +#define S_VLAN_XTRACT 18 +#define V_VLAN_XTRACT(x) ((x) << S_VLAN_XTRACT) +#define F_VLAN_XTRACT V_VLAN_XTRACT(1U) + +#define A_SG_DOORBELL 0x4 +#define A_SG_CMD0BASELWR 0x8 +#define A_SG_CMD0BASEUPR 0xc +#define A_SG_CMD1BASELWR 0x10 +#define A_SG_CMD1BASEUPR 0x14 +#define A_SG_FL0BASELWR 0x18 +#define A_SG_FL0BASEUPR 0x1c +#define A_SG_FL1BASELWR 0x20 +#define A_SG_FL1BASEUPR 0x24 +#define A_SG_CMD0SIZE 0x28 +#define A_SG_FL0SIZE 0x2c +#define A_SG_RSPSIZE 0x30 +#define A_SG_RSPBASELWR 0x34 +#define A_SG_RSPBASEUPR 0x38 +#define A_SG_FLTHRESHOLD 0x3c +#define A_SG_RSPQUEUECREDIT 0x40 +#define A_SG_SLEEPING 0x48 +#define A_SG_INTRTIMER 0x4c +#define A_SG_CMD1SIZE 0xb0 +#define A_SG_FL1SIZE 0xb4 +#define A_SG_INT_ENABLE 0xb8 + +#define S_RESPQ_EXHAUSTED 0 +#define V_RESPQ_EXHAUSTED(x) ((x) << S_RESPQ_EXHAUSTED) +#define F_RESPQ_EXHAUSTED V_RESPQ_EXHAUSTED(1U) + +#define S_RESPQ_OVERFLOW 1 +#define V_RESPQ_OVERFLOW(x) ((x) << S_RESPQ_OVERFLOW) +#define F_RESPQ_OVERFLOW V_RESPQ_OVERFLOW(1U) + +#define S_FL_EXHAUSTED 2 +#define V_FL_EXHAUSTED(x) ((x) << S_FL_EXHAUSTED) +#define F_FL_EXHAUSTED V_FL_EXHAUSTED(1U) + +#define S_PACKET_TOO_BIG 3 +#define V_PACKET_TOO_BIG(x) ((x) << S_PACKET_TOO_BIG) +#define F_PACKET_TOO_BIG V_PACKET_TOO_BIG(1U) + +#define S_PACKET_MISMATCH 4 +#define V_PACKET_MISMATCH(x) ((x) << S_PACKET_MISMATCH) +#define F_PACKET_MISMATCH V_PACKET_MISMATCH(1U) + +#define A_SG_INT_CAUSE 0xbc + +/* MC3 registers */ + +#define S_READY 1 +#define V_READY(x) ((x) << S_READY) +#define F_READY V_READY(1U) + +/* MC4 registers */ + +#define A_MC4_CFG 0x180 +#define S_MC4_SLOW 25 +#define V_MC4_SLOW(x) ((x) << S_MC4_SLOW) +#define F_MC4_SLOW V_MC4_SLOW(1U) + +/* TPI registers */ + +#define A_TPI_ADDR 0x280 +#define A_TPI_WR_DATA 0x284 +#define A_TPI_RD_DATA 0x288 +#define A_TPI_CSR 0x28c + +#define S_TPIWR 0 +#define V_TPIWR(x) ((x) << S_TPIWR) +#define F_TPIWR V_TPIWR(1U) + +#define S_TPIRDY 1 +#define V_TPIRDY(x) ((x) << S_TPIRDY) +#define F_TPIRDY V_TPIRDY(1U) + +#define A_TPI_PAR 0x29c + +#define S_TPIPAR 0 +#define M_TPIPAR 0x7f +#define V_TPIPAR(x) ((x) << S_TPIPAR) +#define G_TPIPAR(x) (((x) >> S_TPIPAR) & M_TPIPAR) + +/* TP registers */ + +#define A_TP_IN_CONFIG 0x300 + +#define S_TP_IN_CSPI_CPL 3 +#define V_TP_IN_CSPI_CPL(x) ((x) << S_TP_IN_CSPI_CPL) +#define F_TP_IN_CSPI_CPL V_TP_IN_CSPI_CPL(1U) + +#define S_TP_IN_CSPI_CHECK_IP_CSUM 5 +#define V_TP_IN_CSPI_CHECK_IP_CSUM(x) ((x) << S_TP_IN_CSPI_CHECK_IP_CSUM) +#define F_TP_IN_CSPI_CHECK_IP_CSUM V_TP_IN_CSPI_CHECK_IP_CSUM(1U) + +#define S_TP_IN_CSPI_CHECK_TCP_CSUM 6 +#define V_TP_IN_CSPI_CHECK_TCP_CSUM(x) ((x) << S_TP_IN_CSPI_CHECK_TCP_CSUM) +#define F_TP_IN_CSPI_CHECK_TCP_CSUM V_TP_IN_CSPI_CHECK_TCP_CSUM(1U) + +#define S_TP_IN_ESPI_ETHERNET 8 +#define V_TP_IN_ESPI_ETHERNET(x) ((x) << S_TP_IN_ESPI_ETHERNET) +#define F_TP_IN_ESPI_ETHERNET V_TP_IN_ESPI_ETHERNET(1U) + +#define S_TP_IN_ESPI_CHECK_IP_CSUM 12 +#define V_TP_IN_ESPI_CHECK_IP_CSUM(x) ((x) << S_TP_IN_ESPI_CHECK_IP_CSUM) +#define F_TP_IN_ESPI_CHECK_IP_CSUM V_TP_IN_ESPI_CHECK_IP_CSUM(1U) + +#define S_TP_IN_ESPI_CHECK_TCP_CSUM 13 +#define V_TP_IN_ESPI_CHECK_TCP_CSUM(x) ((x) << S_TP_IN_ESPI_CHECK_TCP_CSUM) +#define F_TP_IN_ESPI_CHECK_TCP_CSUM V_TP_IN_ESPI_CHECK_TCP_CSUM(1U) + +#define S_OFFLOAD_DISABLE 14 +#define V_OFFLOAD_DISABLE(x) ((x) << S_OFFLOAD_DISABLE) +#define F_OFFLOAD_DISABLE V_OFFLOAD_DISABLE(1U) + +#define A_TP_OUT_CONFIG 0x304 + +#define S_TP_OUT_CSPI_CPL 2 +#define V_TP_OUT_CSPI_CPL(x) ((x) << S_TP_OUT_CSPI_CPL) +#define F_TP_OUT_CSPI_CPL V_TP_OUT_CSPI_CPL(1U) + +#define S_TP_OUT_ESPI_ETHERNET 6 +#define V_TP_OUT_ESPI_ETHERNET(x) ((x) << S_TP_OUT_ESPI_ETHERNET) +#define F_TP_OUT_ESPI_ETHERNET V_TP_OUT_ESPI_ETHERNET(1U) + +#define S_TP_OUT_ESPI_GENERATE_IP_CSUM 10 +#define V_TP_OUT_ESPI_GENERATE_IP_CSUM(x) ((x) << S_TP_OUT_ESPI_GENERATE_IP_CSUM) +#define F_TP_OUT_ESPI_GENERATE_IP_CSUM V_TP_OUT_ESPI_GENERATE_IP_CSUM(1U) + +#define S_TP_OUT_ESPI_GENERATE_TCP_CSUM 11 +#define V_TP_OUT_ESPI_GENERATE_TCP_CSUM(x) ((x) << S_TP_OUT_ESPI_GENERATE_TCP_CSUM) +#define F_TP_OUT_ESPI_GENERATE_TCP_CSUM V_TP_OUT_ESPI_GENERATE_TCP_CSUM(1U) + +#define A_TP_GLOBAL_CONFIG 0x308 + +#define S_IP_TTL 0 +#define M_IP_TTL 0xff +#define V_IP_TTL(x) ((x) << S_IP_TTL) + +#define S_TCP_CSUM 11 +#define V_TCP_CSUM(x) ((x) << S_TCP_CSUM) +#define F_TCP_CSUM V_TCP_CSUM(1U) + +#define S_UDP_CSUM 12 +#define V_UDP_CSUM(x) ((x) << S_UDP_CSUM) +#define F_UDP_CSUM V_UDP_CSUM(1U) + +#define S_IP_CSUM 13 +#define V_IP_CSUM(x) ((x) << S_IP_CSUM) +#define F_IP_CSUM V_IP_CSUM(1U) + +#define S_PATH_MTU 15 +#define V_PATH_MTU(x) ((x) << S_PATH_MTU) +#define F_PATH_MTU V_PATH_MTU(1U) + +#define S_5TUPLE_LOOKUP 17 +#define V_5TUPLE_LOOKUP(x) ((x) << S_5TUPLE_LOOKUP) + +#define S_SYN_COOKIE_PARAMETER 26 +#define V_SYN_COOKIE_PARAMETER(x) ((x) << S_SYN_COOKIE_PARAMETER) + +#define A_TP_PC_CONFIG 0x348 +#define S_TP_PC_REV 30 +#define M_TP_PC_REV 0x3 +#define G_TP_PC_REV(x) (((x) >> S_TP_PC_REV) & M_TP_PC_REV) +#define A_TP_RESET 0x44c +#define S_TP_RESET 0 +#define V_TP_RESET(x) ((x) << S_TP_RESET) +#define F_TP_RESET V_TP_RESET(1U) + +#define A_TP_INT_ENABLE 0x470 +#define A_TP_INT_CAUSE 0x474 +#define A_TP_TX_DROP_CONFIG 0x4b8 + +#define S_ENABLE_TX_DROP 31 +#define V_ENABLE_TX_DROP(x) ((x) << S_ENABLE_TX_DROP) +#define F_ENABLE_TX_DROP V_ENABLE_TX_DROP(1U) + +#define S_ENABLE_TX_ERROR 30 +#define V_ENABLE_TX_ERROR(x) ((x) << S_ENABLE_TX_ERROR) +#define F_ENABLE_TX_ERROR V_ENABLE_TX_ERROR(1U) + +#define S_DROP_TICKS_CNT 4 +#define V_DROP_TICKS_CNT(x) ((x) << S_DROP_TICKS_CNT) + +#define S_NUM_PKTS_DROPPED 0 +#define V_NUM_PKTS_DROPPED(x) ((x) << S_NUM_PKTS_DROPPED) + +/* CSPI registers */ + +#define S_DIP4ERR 0 +#define V_DIP4ERR(x) ((x) << S_DIP4ERR) +#define F_DIP4ERR V_DIP4ERR(1U) + +#define S_RXDROP 1 +#define V_RXDROP(x) ((x) << S_RXDROP) +#define F_RXDROP V_RXDROP(1U) + +#define S_TXDROP 2 +#define V_TXDROP(x) ((x) << S_TXDROP) +#define F_TXDROP V_TXDROP(1U) + +#define S_RXOVERFLOW 3 +#define V_RXOVERFLOW(x) ((x) << S_RXOVERFLOW) +#define F_RXOVERFLOW V_RXOVERFLOW(1U) + +#define S_RAMPARITYERR 4 +#define V_RAMPARITYERR(x) ((x) << S_RAMPARITYERR) +#define F_RAMPARITYERR V_RAMPARITYERR(1U) + +/* ESPI registers */ + +#define A_ESPI_SCH_TOKEN0 0x880 +#define A_ESPI_SCH_TOKEN1 0x884 +#define A_ESPI_SCH_TOKEN2 0x888 +#define A_ESPI_SCH_TOKEN3 0x88c +#define A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK 0x890 +#define A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK 0x894 +#define A_ESPI_CALENDAR_LENGTH 0x898 +#define A_PORT_CONFIG 0x89c + +#define S_RX_NPORTS 0 +#define V_RX_NPORTS(x) ((x) << S_RX_NPORTS) + +#define S_TX_NPORTS 8 +#define V_TX_NPORTS(x) ((x) << S_TX_NPORTS) + +#define A_ESPI_FIFO_STATUS_ENABLE 0x8a0 + +#define S_RXSTATUSENABLE 0 +#define V_RXSTATUSENABLE(x) ((x) << S_RXSTATUSENABLE) +#define F_RXSTATUSENABLE V_RXSTATUSENABLE(1U) + +#define S_INTEL1010MODE 4 +#define V_INTEL1010MODE(x) ((x) << S_INTEL1010MODE) +#define F_INTEL1010MODE V_INTEL1010MODE(1U) + +#define A_ESPI_MAXBURST1_MAXBURST2 0x8a8 +#define A_ESPI_TRAIN 0x8ac +#define A_ESPI_INTR_STATUS 0x8c8 + +#define S_DIP2PARITYERR 5 +#define V_DIP2PARITYERR(x) ((x) << S_DIP2PARITYERR) +#define F_DIP2PARITYERR V_DIP2PARITYERR(1U) + +#define A_ESPI_INTR_ENABLE 0x8cc +#define A_RX_DROP_THRESHOLD 0x8d0 +#define A_ESPI_RX_RESET 0x8ec +#define A_ESPI_MISC_CONTROL 0x8f0 + +#define S_OUT_OF_SYNC_COUNT 0 +#define V_OUT_OF_SYNC_COUNT(x) ((x) << S_OUT_OF_SYNC_COUNT) + +#define S_DIP2_PARITY_ERR_THRES 5 +#define V_DIP2_PARITY_ERR_THRES(x) ((x) << S_DIP2_PARITY_ERR_THRES) + +#define S_DIP4_THRES 9 +#define V_DIP4_THRES(x) ((x) << S_DIP4_THRES) + +#define S_MONITORED_PORT_NUM 25 +#define V_MONITORED_PORT_NUM(x) ((x) << S_MONITORED_PORT_NUM) + +#define S_MONITORED_DIRECTION 27 +#define V_MONITORED_DIRECTION(x) ((x) << S_MONITORED_DIRECTION) +#define F_MONITORED_DIRECTION V_MONITORED_DIRECTION(1U) + +#define S_MONITORED_INTERFACE 28 +#define V_MONITORED_INTERFACE(x) ((x) << S_MONITORED_INTERFACE) +#define F_MONITORED_INTERFACE V_MONITORED_INTERFACE(1U) + +#define A_ESPI_DIP2_ERR_COUNT 0x8f4 +#define A_ESPI_CMD_ADDR 0x8f8 + +#define S_WRITE_DATA 0 +#define V_WRITE_DATA(x) ((x) << S_WRITE_DATA) + +#define S_REGISTER_OFFSET 8 +#define V_REGISTER_OFFSET(x) ((x) << S_REGISTER_OFFSET) + +#define S_CHANNEL_ADDR 12 +#define V_CHANNEL_ADDR(x) ((x) << S_CHANNEL_ADDR) + +#define S_MODULE_ADDR 16 +#define V_MODULE_ADDR(x) ((x) << S_MODULE_ADDR) + +#define S_BUNDLE_ADDR 20 +#define V_BUNDLE_ADDR(x) ((x) << S_BUNDLE_ADDR) + +#define S_SPI4_COMMAND 24 +#define V_SPI4_COMMAND(x) ((x) << S_SPI4_COMMAND) + +#define A_ESPI_GOSTAT 0x8fc +#define S_ESPI_CMD_BUSY 8 +#define V_ESPI_CMD_BUSY(x) ((x) << S_ESPI_CMD_BUSY) +#define F_ESPI_CMD_BUSY V_ESPI_CMD_BUSY(1U) + +/* PL registers */ + +#define A_PL_ENABLE 0xa00 + +#define S_PL_INTR_SGE_ERR 0 +#define V_PL_INTR_SGE_ERR(x) ((x) << S_PL_INTR_SGE_ERR) +#define F_PL_INTR_SGE_ERR V_PL_INTR_SGE_ERR(1U) + +#define S_PL_INTR_SGE_DATA 1 +#define V_PL_INTR_SGE_DATA(x) ((x) << S_PL_INTR_SGE_DATA) +#define F_PL_INTR_SGE_DATA V_PL_INTR_SGE_DATA(1U) + +#define S_PL_INTR_TP 6 +#define V_PL_INTR_TP(x) ((x) << S_PL_INTR_TP) +#define F_PL_INTR_TP V_PL_INTR_TP(1U) + +#define S_PL_INTR_ESPI 8 +#define V_PL_INTR_ESPI(x) ((x) << S_PL_INTR_ESPI) +#define F_PL_INTR_ESPI V_PL_INTR_ESPI(1U) + +#define S_PL_INTR_PCIX 10 +#define V_PL_INTR_PCIX(x) ((x) << S_PL_INTR_PCIX) +#define F_PL_INTR_PCIX V_PL_INTR_PCIX(1U) + +#define S_PL_INTR_EXT 11 +#define V_PL_INTR_EXT(x) ((x) << S_PL_INTR_EXT) +#define F_PL_INTR_EXT V_PL_INTR_EXT(1U) + +#define A_PL_CAUSE 0xa04 + +/* MC5 registers */ + +#define A_MC5_CONFIG 0xc04 + +#define S_TCAM_RESET 1 +#define V_TCAM_RESET(x) ((x) << S_TCAM_RESET) +#define F_TCAM_RESET V_TCAM_RESET(1U) + +#define S_M_BUS_ENABLE 5 +#define V_M_BUS_ENABLE(x) ((x) << S_M_BUS_ENABLE) +#define F_M_BUS_ENABLE V_M_BUS_ENABLE(1U) + +/* PCICFG registers */ + +#define A_PCICFG_PM_CSR 0x44 +#define A_PCICFG_VPD_ADDR 0x4a + +#define S_VPD_OP_FLAG 15 +#define V_VPD_OP_FLAG(x) ((x) << S_VPD_OP_FLAG) +#define F_VPD_OP_FLAG V_VPD_OP_FLAG(1U) + +#define A_PCICFG_VPD_DATA 0x4c + +#define A_PCICFG_INTR_ENABLE 0xf4 +#define A_PCICFG_INTR_CAUSE 0xf8 + +#define A_PCICFG_MODE 0xfc + +#define S_PCI_MODE_64BIT 0 +#define V_PCI_MODE_64BIT(x) ((x) << S_PCI_MODE_64BIT) +#define F_PCI_MODE_64BIT V_PCI_MODE_64BIT(1U) + +#define S_PCI_MODE_PCIX 5 +#define V_PCI_MODE_PCIX(x) ((x) << S_PCI_MODE_PCIX) +#define F_PCI_MODE_PCIX V_PCI_MODE_PCIX(1U) + +#define S_PCI_MODE_CLK 6 +#define M_PCI_MODE_CLK 0x3 +#define G_PCI_MODE_CLK(x) (((x) >> S_PCI_MODE_CLK) & M_PCI_MODE_CLK) + diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c new file mode 100644 index 000000000000..bcf8b1e939b0 --- /dev/null +++ b/drivers/net/chelsio/sge.c @@ -0,0 +1,1451 @@ +/***************************************************************************** + * * + * File: sge.c * + * $Revision: 1.13 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * DMA engine. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpl5_cmd.h" +#include "sge.h" +#include "regs.h" +#include "espi.h" + +#include + +#define SGE_CMDQ_N 2 +#define SGE_FREELQ_N 2 +#define SGE_CMDQ0_E_N 512 +#define SGE_CMDQ1_E_N 128 +#define SGE_FREEL_SIZE 4096 +#define SGE_JUMBO_FREEL_SIZE 512 +#define SGE_FREEL_REFILL_THRESH 16 +#define SGE_RESPQ_E_N 1024 +#define SGE_INTR_BUCKETSIZE 100 +#define SGE_INTR_LATBUCKETS 5 +#define SGE_INTR_MAXBUCKETS 11 +#define SGE_INTRTIMER0 1 +#define SGE_INTRTIMER1 50 +#define SGE_INTRTIMER_NRES 10000 +#define SGE_RX_COPY_THRESHOLD 256 +#define SGE_RX_SM_BUF_SIZE 1536 + +#define SGE_RESPQ_REPLENISH_THRES ((3 * SGE_RESPQ_E_N) / 4) + +#define SGE_RX_OFFSET 2 +#ifndef NET_IP_ALIGN +# define NET_IP_ALIGN SGE_RX_OFFSET +#endif + +/* + * Memory Mapped HW Command, Freelist and Response Queue Descriptors + */ +#if defined(__BIG_ENDIAN_BITFIELD) +struct cmdQ_e { + u32 AddrLow; + u32 GenerationBit : 1; + u32 BufferLength : 31; + u32 RespQueueSelector : 4; + u32 ResponseTokens : 12; + u32 CmdId : 8; + u32 Reserved : 3; + u32 TokenValid : 1; + u32 Eop : 1; + u32 Sop : 1; + u32 DataValid : 1; + u32 GenerationBit2 : 1; + u32 AddrHigh; +}; + +struct freelQ_e { + u32 AddrLow; + u32 GenerationBit : 1; + u32 BufferLength : 31; + u32 Reserved : 31; + u32 GenerationBit2 : 1; + u32 AddrHigh; +}; + +struct respQ_e { + u32 Qsleeping : 4; + u32 Cmdq1CreditReturn : 5; + u32 Cmdq1DmaComplete : 5; + u32 Cmdq0CreditReturn : 5; + u32 Cmdq0DmaComplete : 5; + u32 FreelistQid : 2; + u32 CreditValid : 1; + u32 DataValid : 1; + u32 Offload : 1; + u32 Eop : 1; + u32 Sop : 1; + u32 GenerationBit : 1; + u32 BufferLength; +}; + +#elif defined(__LITTLE_ENDIAN_BITFIELD) +struct cmdQ_e { + u32 BufferLength : 31; + u32 GenerationBit : 1; + u32 AddrLow; + u32 AddrHigh; + u32 GenerationBit2 : 1; + u32 DataValid : 1; + u32 Sop : 1; + u32 Eop : 1; + u32 TokenValid : 1; + u32 Reserved : 3; + u32 CmdId : 8; + u32 ResponseTokens : 12; + u32 RespQueueSelector : 4; +}; + +struct freelQ_e { + u32 BufferLength : 31; + u32 GenerationBit : 1; + u32 AddrLow; + u32 AddrHigh; + u32 GenerationBit2 : 1; + u32 Reserved : 31; +}; + +struct respQ_e { + u32 BufferLength; + u32 GenerationBit : 1; + u32 Sop : 1; + u32 Eop : 1; + u32 Offload : 1; + u32 DataValid : 1; + u32 CreditValid : 1; + u32 FreelistQid : 2; + u32 Cmdq0DmaComplete : 5; + u32 Cmdq0CreditReturn : 5; + u32 Cmdq1DmaComplete : 5; + u32 Cmdq1CreditReturn : 5; + u32 Qsleeping : 4; +} ; +#endif + +/* + * SW Context Command and Freelist Queue Descriptors + */ +struct cmdQ_ce { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr); + DECLARE_PCI_UNMAP_LEN(dma_len); + unsigned int single; +}; + +struct freelQ_ce { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr); + DECLARE_PCI_UNMAP_LEN(dma_len); +}; + +/* + * SW Command, Freelist and Response Queue + */ +struct cmdQ { + atomic_t asleep; /* HW DMA Fetch status */ + atomic_t credits; /* # available descriptors for TX */ + atomic_t pio_pidx; /* Variable updated on Doorbell */ + u16 entries_n; /* # descriptors for TX */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u8 genbit; /* current generation (=valid) bit */ + struct cmdQ_e *entries; /* HW command descriptor Q */ + struct cmdQ_ce *centries; /* SW command context descriptor Q */ + spinlock_t Qlock; /* Lock to protect cmdQ enqueuing */ + dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ +}; + +struct freelQ { + unsigned int credits; /* # of available RX buffers */ + unsigned int entries_n; /* free list capacity */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u16 rx_buffer_size; /* Buffer size on this free list */ + u16 dma_offset; /* DMA offset to align IP headers */ + u8 genbit; /* current generation (=valid) bit */ + struct freelQ_e *entries; /* HW freelist descriptor Q */ + struct freelQ_ce *centries; /* SW freelist conext descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ +}; + +struct respQ { + u16 credits; /* # of available respQ descriptors */ + u16 credits_pend; /* # of not yet returned descriptors */ + u16 entries_n; /* # of response Q descriptors */ + u16 pidx; /* producer index (HW) */ + u16 cidx; /* consumer index (SW) */ + u8 genbit; /* current generation(=valid) bit */ + struct respQ_e *entries; /* HW response descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ +}; + +/* + * Main SGE data structure + * + * Interrupts are handled by a single CPU and it is likely that on a MP system + * the application is migrated to another CPU. In that scenario, we try to + * seperate the RX(in irq context) and TX state in order to decrease memory + * contention. + */ +struct sge { + struct adapter *adapter; /* adapter backpointer */ + struct freelQ freelQ[SGE_FREELQ_N]; /* freelist Q(s) */ + struct respQ respQ; /* response Q instatiation */ + unsigned int rx_pkt_pad; /* RX padding for L2 packets */ + unsigned int jumbo_fl; /* jumbo freelist Q index */ + u32 intrtimer[SGE_INTR_MAXBUCKETS]; /* ! */ + u32 currIndex; /* current index into intrtimer[] */ + u32 intrtimer_nres; /* no resource interrupt timer value */ + u32 sge_control; /* shadow content of sge control reg */ + struct sge_intr_counts intr_cnt; + struct timer_list ptimer; + struct sk_buff *pskb; + u32 ptimeout; + struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned; /* command Q(s)*/ +}; + +static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid); + +/* + * PIO to indicate that memory mapped Q contains valid descriptor(s). + */ +static inline void doorbell_pio(struct sge *sge, u32 val) +{ + wmb(); + t1_write_reg_4(sge->adapter, A_SG_DOORBELL, val); +} + +/* + * Disables the DMA engine. + */ +void t1_sge_stop(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_SG_CONTROL, 0); + t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ + if (is_T2(sge->adapter)) + del_timer_sync(&sge->ptimer); +} + +static u8 ch_mac_addr[ETH_ALEN] = {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; +static void t1_espi_workaround(void *data) +{ + struct adapter *adapter = (struct adapter *)data; + struct sge *sge = adapter->sge; + + if (netif_running(adapter->port[0].dev) && + atomic_read(&sge->cmdQ[0].asleep)) { + + u32 seop = t1_espi_get_mon(adapter, 0x930, 0); + + if ((seop & 0xfff0fff) == 0xfff && sge->pskb) { + struct sk_buff *skb = sge->pskb; + if (!skb->cb[0]) { + memcpy(skb->data+sizeof(struct cpl_tx_pkt), ch_mac_addr, ETH_ALEN); + memcpy(skb->data+skb->len-10, ch_mac_addr, ETH_ALEN); + + skb->cb[0] = 0xff; + } + t1_sge_tx(skb, adapter,0); + } + } + mod_timer(&adapter->sge->ptimer, jiffies + sge->ptimeout); +} + +/* + * Enables the DMA engine. + */ +void t1_sge_start(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_SG_CONTROL, sge->sge_control); + t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ + if (is_T2(sge->adapter)) { + init_timer(&sge->ptimer); + sge->ptimer.function = (void *)&t1_espi_workaround; + sge->ptimer.data = (unsigned long)sge->adapter; + sge->ptimer.expires = jiffies + sge->ptimeout; + add_timer(&sge->ptimer); + } +} + +/* + * Creates a t1_sge structure and returns suggested resource parameters. + */ +struct sge * __devinit t1_sge_create(struct adapter *adapter, + struct sge_params *p) +{ + struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); + + if (!sge) + return NULL; + memset(sge, 0, sizeof(*sge)); + + if (is_T2(adapter)) + sge->ptimeout = 1; /* finest allowed */ + + sge->adapter = adapter; + sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : SGE_RX_OFFSET; + sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + p->cmdQ_size[0] = SGE_CMDQ0_E_N; + p->cmdQ_size[1] = SGE_CMDQ1_E_N; + p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; + p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; + p->rx_coalesce_usecs = SGE_INTRTIMER1; + p->last_rx_coalesce_raw = SGE_INTRTIMER1 * + (board_info(sge->adapter)->clock_core / 1000000); + p->default_rx_coalesce_usecs = SGE_INTRTIMER1; + p->coalesce_enable = 0; /* Turn off adaptive algorithm by default */ + p->sample_interval_usecs = 0; + return sge; +} + +/* + * Frees all RX buffers on the freelist Q. The caller must make sure that + * the SGE is turned off before calling this function. + */ +static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *Q) +{ + unsigned int cidx = Q->cidx, credits = Q->credits; + + while (credits--) { + struct freelQ_ce *ce = &Q->centries[cidx]; + + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + dev_kfree_skb(ce->skb); + ce->skb = NULL; + if (++cidx == Q->entries_n) + cidx = 0; + } +} + +/* + * Free RX free list and response queue resources. + */ +static void free_rx_resources(struct sge *sge) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + if (sge->respQ.entries) { + size = sizeof(struct respQ_e) * sge->respQ.entries_n; + pci_free_consistent(pdev, size, sge->respQ.entries, + sge->respQ.dma_addr); + } + + for (i = 0; i < SGE_FREELQ_N; i++) { + struct freelQ *Q = &sge->freelQ[i]; + + if (Q->centries) { + free_freelQ_buffers(pdev, Q); + kfree(Q->centries); + } + if (Q->entries) { + size = sizeof(struct freelQ_e) * Q->entries_n; + pci_free_consistent(pdev, size, Q->entries, + Q->dma_addr); + } + } +} + +/* + * Allocates basic RX resources, consisting of memory mapped freelist Qs and a + * response Q. + */ +static int alloc_rx_resources(struct sge *sge, struct sge_params *p) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_FREELQ_N; i++) { + struct freelQ *Q = &sge->freelQ[i]; + + Q->genbit = 1; + Q->entries_n = p->freelQ_size[i]; + Q->dma_offset = SGE_RX_OFFSET - sge->rx_pkt_pad; + size = sizeof(struct freelQ_e) * Q->entries_n; + Q->entries = (struct freelQ_e *) + pci_alloc_consistent(pdev, size, &Q->dma_addr); + if (!Q->entries) + goto err_no_mem; + memset(Q->entries, 0, size); + Q->centries = kcalloc(Q->entries_n, sizeof(struct freelQ_ce), + GFP_KERNEL); + if (!Q->centries) + goto err_no_mem; + } + + /* + * Calculate the buffer sizes for the two free lists. FL0 accommodates + * regular sized Ethernet frames, FL1 is sized not to exceed 16K, + * including all the sk_buff overhead. + * + * Note: For T2 FL0 and FL1 are reversed. + */ + sge->freelQ[!sge->jumbo_fl].rx_buffer_size = SGE_RX_SM_BUF_SIZE + + sizeof(struct cpl_rx_data) + + sge->freelQ[!sge->jumbo_fl].dma_offset; + sge->freelQ[sge->jumbo_fl].rx_buffer_size = (16 * 1024) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + sge->respQ.genbit = 1; + sge->respQ.entries_n = SGE_RESPQ_E_N; + sge->respQ.credits = SGE_RESPQ_E_N; + size = sizeof(struct respQ_e) * sge->respQ.entries_n; + sge->respQ.entries = (struct respQ_e *) + pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr); + if (!sge->respQ.entries) + goto err_no_mem; + memset(sge->respQ.entries, 0, size); + return 0; + +err_no_mem: + free_rx_resources(sge); + return -ENOMEM; +} + +/* + * Frees 'credits_pend' TX buffers and returns the credits to Q->credits. + * + * The adaptive algorithm receives the total size of the buffers freed + * accumulated in @*totpayload. No initialization of this argument here. + * + */ +static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *Q, + unsigned int credits_pend, unsigned int *totpayload) +{ + struct pci_dev *pdev = sge->adapter->pdev; + struct sk_buff *skb; + struct cmdQ_ce *ce, *cq = Q->centries; + unsigned int entries_n = Q->entries_n, cidx = Q->cidx, + i = credits_pend; + + + ce = &cq[cidx]; + while (i--) { + if (ce->single) + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_TODEVICE); + if (totpayload) + *totpayload += pci_unmap_len(ce, dma_len); + + skb = ce->skb; + if (skb) + dev_kfree_skb_irq(skb); + + ce++; + if (++cidx == entries_n) { + cidx = 0; + ce = cq; + } + } + + Q->cidx = cidx; + atomic_add(credits_pend, &Q->credits); +} + +/* + * Free TX resources. + * + * Assumes that SGE is stopped and all interrupts are disabled. + */ +static void free_tx_resources(struct sge *sge) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_CMDQ_N; i++) { + struct cmdQ *Q = &sge->cmdQ[i]; + + if (Q->centries) { + unsigned int pending = Q->entries_n - + atomic_read(&Q->credits); + + if (pending) + free_cmdQ_buffers(sge, Q, pending, NULL); + kfree(Q->centries); + } + if (Q->entries) { + size = sizeof(struct cmdQ_e) * Q->entries_n; + pci_free_consistent(pdev, size, Q->entries, + Q->dma_addr); + } + } +} + +/* + * Allocates basic TX resources, consisting of memory mapped command Qs. + */ +static int alloc_tx_resources(struct sge *sge, struct sge_params *p) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_CMDQ_N; i++) { + struct cmdQ *Q = &sge->cmdQ[i]; + + Q->genbit = 1; + Q->entries_n = p->cmdQ_size[i]; + atomic_set(&Q->credits, Q->entries_n); + atomic_set(&Q->asleep, 1); + spin_lock_init(&Q->Qlock); + size = sizeof(struct cmdQ_e) * Q->entries_n; + Q->entries = (struct cmdQ_e *) + pci_alloc_consistent(pdev, size, &Q->dma_addr); + if (!Q->entries) + goto err_no_mem; + memset(Q->entries, 0, size); + Q->centries = kcalloc(Q->entries_n, sizeof(struct cmdQ_ce), + GFP_KERNEL); + if (!Q->centries) + goto err_no_mem; + } + + return 0; + +err_no_mem: + free_tx_resources(sge); + return -ENOMEM; +} + +static inline void setup_ring_params(struct adapter *adapter, u64 addr, + u32 size, int base_reg_lo, + int base_reg_hi, int size_reg) +{ + t1_write_reg_4(adapter, base_reg_lo, (u32)addr); + t1_write_reg_4(adapter, base_reg_hi, addr >> 32); + t1_write_reg_4(adapter, size_reg, size); +} + +/* + * Enable/disable VLAN acceleration. + */ +void t1_set_vlan_accel(struct adapter *adapter, int on_off) +{ + struct sge *sge = adapter->sge; + + sge->sge_control &= ~F_VLAN_XTRACT; + if (on_off) + sge->sge_control |= F_VLAN_XTRACT; + if (adapter->open_device_map) { + t1_write_reg_4(adapter, A_SG_CONTROL, sge->sge_control); + t1_read_reg_4(adapter, A_SG_CONTROL); /* flush */ + } +} + +/* + * Sets the interrupt latency timer when the adaptive Rx coalescing + * is turned off. Do nothing when it is turned on again. + * + * This routine relies on the fact that the caller has already set + * the adaptive policy in adapter->sge_params before calling it. +*/ +int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) +{ + if (!p->coalesce_enable) { + u32 newTimer = p->rx_coalesce_usecs * + (board_info(sge->adapter)->clock_core / 1000000); + + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, newTimer); + } + return 0; +} + +/* + * Programs the various SGE registers. However, the engine is not yet enabled, + * but sge->sge_control is setup and ready to go. + */ +static void configure_sge(struct sge *sge, struct sge_params *p) +{ + struct adapter *ap = sge->adapter; + int i; + + t1_write_reg_4(ap, A_SG_CONTROL, 0); + setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].entries_n, + A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE); + setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].entries_n, + A_SG_CMD1BASELWR, A_SG_CMD1BASEUPR, A_SG_CMD1SIZE); + setup_ring_params(ap, sge->freelQ[0].dma_addr, + sge->freelQ[0].entries_n, A_SG_FL0BASELWR, + A_SG_FL0BASEUPR, A_SG_FL0SIZE); + setup_ring_params(ap, sge->freelQ[1].dma_addr, + sge->freelQ[1].entries_n, A_SG_FL1BASELWR, + A_SG_FL1BASEUPR, A_SG_FL1SIZE); + + /* The threshold comparison uses <. */ + t1_write_reg_4(ap, A_SG_FLTHRESHOLD, SGE_RX_SM_BUF_SIZE + 1); + + setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.entries_n, + A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); + t1_write_reg_4(ap, A_SG_RSPQUEUECREDIT, (u32)sge->respQ.entries_n); + + sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE | + F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE | + V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE | + V_RX_PKT_OFFSET(sge->rx_pkt_pad); + +#if defined(__BIG_ENDIAN_BITFIELD) + sge->sge_control |= F_ENABLE_BIG_ENDIAN; +#endif + + /* + * Initialize the SGE Interrupt Timer arrray: + * intrtimer[0] = (SGE_INTRTIMER0) usec + * intrtimer[0intrtimer[0] = board_info(sge->adapter)->clock_core / 1000000; + for (i = 1; i < SGE_INTR_LATBUCKETS; ++i) { + sge->intrtimer[i] = SGE_INTRTIMER0 + (2 * i); + sge->intrtimer[i] *= sge->intrtimer[0]; + } + for (i = SGE_INTR_LATBUCKETS; i < SGE_INTR_MAXBUCKETS - 1; ++i) { + sge->intrtimer[i] = (i - 3) * 6; + sge->intrtimer[i] *= sge->intrtimer[0]; + } + sge->intrtimer[SGE_INTR_MAXBUCKETS - 1] = + sge->intrtimer[0] * SGE_INTRTIMER1; + /* Initialize resource timer */ + sge->intrtimer_nres = sge->intrtimer[0] * SGE_INTRTIMER_NRES; + /* Finally finish initialization of intrtimer[0] */ + sge->intrtimer[0] *= SGE_INTRTIMER0; + /* Initialize for a throughput oriented workload */ + sge->currIndex = SGE_INTR_MAXBUCKETS - 1; + + if (p->coalesce_enable) + t1_write_reg_4(ap, A_SG_INTRTIMER, + sge->intrtimer[sge->currIndex]); + else + t1_sge_set_coalesce_params(sge, p); +} + +/* + * Return the payload capacity of the jumbo free-list buffers. + */ +static inline unsigned int jumbo_payload_capacity(const struct sge *sge) +{ + return sge->freelQ[sge->jumbo_fl].rx_buffer_size - + sizeof(struct cpl_rx_data) - SGE_RX_OFFSET + sge->rx_pkt_pad; +} + +/* + * Allocates both RX and TX resources and configures the SGE. However, + * the hardware is not enabled yet. + */ +int t1_sge_configure(struct sge *sge, struct sge_params *p) +{ + if (alloc_rx_resources(sge, p)) + return -ENOMEM; + if (alloc_tx_resources(sge, p)) { + free_rx_resources(sge); + return -ENOMEM; + } + configure_sge(sge, p); + + /* + * Now that we have sized the free lists calculate the payload + * capacity of the large buffers. Other parts of the driver use + * this to set the max offload coalescing size so that RX packets + * do not overflow our large buffers. + */ + p->large_buf_capacity = jumbo_payload_capacity(sge); + return 0; +} + +/* + * Frees all SGE related resources and the sge structure itself + */ +void t1_sge_destroy(struct sge *sge) +{ + if (sge->pskb) + dev_kfree_skb(sge->pskb); + free_tx_resources(sge); + free_rx_resources(sge); + kfree(sge); +} + +/* + * Allocates new RX buffers on the freelist Q (and tracks them on the freelist + * context Q) until the Q is full or alloc_skb fails. + * + * It is possible that the generation bits already match, indicating that the + * buffer is already valid and nothing needs to be done. This happens when we + * copied a received buffer into a new sk_buff during the interrupt processing. + * + * If the SGE doesn't automatically align packets properly (!sge->rx_pkt_pad), + * we specify a RX_OFFSET in order to make sure that the IP header is 4B + * aligned. + */ +static void refill_free_list(struct sge *sge, struct freelQ *Q) +{ + struct pci_dev *pdev = sge->adapter->pdev; + struct freelQ_ce *ce = &Q->centries[Q->pidx]; + struct freelQ_e *e = &Q->entries[Q->pidx]; + unsigned int dma_len = Q->rx_buffer_size - Q->dma_offset; + + + while (Q->credits < Q->entries_n) { + if (e->GenerationBit != Q->genbit) { + struct sk_buff *skb; + dma_addr_t mapping; + + skb = alloc_skb(Q->rx_buffer_size, GFP_ATOMIC); + if (!skb) + break; + if (Q->dma_offset) + skb_reserve(skb, Q->dma_offset); + mapping = pci_map_single(pdev, skb->data, dma_len, + PCI_DMA_FROMDEVICE); + ce->skb = skb; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, dma_len); + e->AddrLow = (u32)mapping; + e->AddrHigh = (u64)mapping >> 32; + e->BufferLength = dma_len; + e->GenerationBit = e->GenerationBit2 = Q->genbit; + } + + e++; + ce++; + if (++Q->pidx == Q->entries_n) { + Q->pidx = 0; + Q->genbit ^= 1; + ce = Q->centries; + e = Q->entries; + } + Q->credits++; + } + +} + +/* + * Calls refill_free_list for both freelist Qs. If we cannot + * fill at least 1/4 of both Qs, we go into 'few interrupt mode' in order + * to give the system time to free up resources. + */ +static void freelQs_empty(struct sge *sge) +{ + u32 irq_reg = t1_read_reg_4(sge->adapter, A_SG_INT_ENABLE); + u32 irqholdoff_reg; + + refill_free_list(sge, &sge->freelQ[0]); + refill_free_list(sge, &sge->freelQ[1]); + + if (sge->freelQ[0].credits > (sge->freelQ[0].entries_n >> 2) && + sge->freelQ[1].credits > (sge->freelQ[1].entries_n >> 2)) { + irq_reg |= F_FL_EXHAUSTED; + irqholdoff_reg = sge->intrtimer[sge->currIndex]; + } else { + /* Clear the F_FL_EXHAUSTED interrupts for now */ + irq_reg &= ~F_FL_EXHAUSTED; + irqholdoff_reg = sge->intrtimer_nres; + } + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, irqholdoff_reg); + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, irq_reg); + + /* We reenable the Qs to force a freelist GTS interrupt later */ + doorbell_pio(sge, F_FL0_ENABLE | F_FL1_ENABLE); +} + +#define SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA) +#define SGE_INT_FATAL (F_RESPQ_OVERFLOW | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) +#define SGE_INT_ENABLE (F_RESPQ_EXHAUSTED | F_RESPQ_OVERFLOW | \ + F_FL_EXHAUSTED | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) + +/* + * Disable SGE Interrupts + */ +void t1_sge_intr_disable(struct sge *sge) +{ + u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + + t1_write_reg_4(sge->adapter, A_PL_ENABLE, val & ~SGE_PL_INTR_MASK); + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, 0); +} + +/* + * Enable SGE interrupts. + */ +void t1_sge_intr_enable(struct sge *sge) +{ + u32 en = SGE_INT_ENABLE; + u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + + if (sge->adapter->flags & TSO_CAPABLE) + en &= ~F_PACKET_TOO_BIG; + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, en); + t1_write_reg_4(sge->adapter, A_PL_ENABLE, val | SGE_PL_INTR_MASK); +} + +/* + * Clear SGE interrupts. + */ +void t1_sge_intr_clear(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_PL_CAUSE, SGE_PL_INTR_MASK); + t1_write_reg_4(sge->adapter, A_SG_INT_CAUSE, 0xffffffff); +} + +/* + * SGE 'Error' interrupt handler + */ +int t1_sge_intr_error_handler(struct sge *sge) +{ + struct adapter *adapter = sge->adapter; + u32 cause = t1_read_reg_4(adapter, A_SG_INT_CAUSE); + + if (adapter->flags & TSO_CAPABLE) + cause &= ~F_PACKET_TOO_BIG; + if (cause & F_RESPQ_EXHAUSTED) + sge->intr_cnt.respQ_empty++; + if (cause & F_RESPQ_OVERFLOW) { + sge->intr_cnt.respQ_overflow++; + CH_ALERT("%s: SGE response queue overflow\n", + adapter->name); + } + if (cause & F_FL_EXHAUSTED) { + sge->intr_cnt.freelistQ_empty++; + freelQs_empty(sge); + } + if (cause & F_PACKET_TOO_BIG) { + sge->intr_cnt.pkt_too_big++; + CH_ALERT("%s: SGE max packet size exceeded\n", + adapter->name); + } + if (cause & F_PACKET_MISMATCH) { + sge->intr_cnt.pkt_mismatch++; + CH_ALERT("%s: SGE packet mismatch\n", adapter->name); + } + if (cause & SGE_INT_FATAL) + t1_fatal_err(adapter); + + t1_write_reg_4(adapter, A_SG_INT_CAUSE, cause); + return 0; +} + +/* + * The following code is copied from 2.6, where the skb_pull is doing the + * right thing and only pulls ETH_HLEN. + * + * Determine the packet's protocol ID. The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. + */ +static unsigned short sge_eth_type_trans(struct sk_buff *skb, + struct net_device *dev) +{ + struct ethhdr *eth; + unsigned char *rawp; + + skb->mac.raw = skb->data; + skb_pull(skb, ETH_HLEN); + eth = (struct ethhdr *)skb->mac.raw; + + if (*eth->h_dest&1) { + if(memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } + + /* + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. + * + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. + */ + + else if (1 /*dev->flags&IFF_PROMISC*/) + { + if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) + skb->pkt_type=PACKET_OTHERHOST; + } + + if (ntohs(eth->h_proto) >= 1536) + return eth->h_proto; + + rawp = skb->data; + + /* + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. + */ + if (*(unsigned short *)rawp == 0xFFFF) + return htons(ETH_P_802_3); + + /* + * Real 802.2 LLC + */ + return htons(ETH_P_802_2); +} + +/* + * Prepare the received buffer and pass it up the stack. If it is small enough + * and allocation doesn't fail, we use a new sk_buff and copy the content. + */ +static unsigned int t1_sge_rx(struct sge *sge, struct freelQ *Q, + unsigned int len, unsigned int offload) +{ + struct sk_buff *skb; + struct adapter *adapter = sge->adapter; + struct freelQ_ce *ce = &Q->centries[Q->cidx]; + + if (len <= SGE_RX_COPY_THRESHOLD && + (skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC))) { + struct freelQ_e *e; + char *src = ce->skb->data; + + pci_dma_sync_single_for_cpu(adapter->pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + if (!offload) { + skb_reserve(skb, NET_IP_ALIGN); + src += sge->rx_pkt_pad; + } + memcpy(skb->data, src, len); + + /* Reuse the entry. */ + e = &Q->entries[Q->cidx]; + e->GenerationBit ^= 1; + e->GenerationBit2 ^= 1; + } else { + pci_unmap_single(adapter->pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + skb = ce->skb; + if (!offload && sge->rx_pkt_pad) + __skb_pull(skb, sge->rx_pkt_pad); + } + + skb_put(skb, len); + + + if (unlikely(offload)) { + { + printk(KERN_ERR + "%s: unexpected offloaded packet, cmd %u\n", + adapter->name, *skb->data); + dev_kfree_skb_any(skb); + } + } else { + struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)skb->data; + + skb_pull(skb, sizeof(*p)); + skb->dev = adapter->port[p->iff].dev; + skb->dev->last_rx = jiffies; + skb->protocol = sge_eth_type_trans(skb, skb->dev); + if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && + skb->protocol == htons(ETH_P_IP) && + (skb->data[9] == IPPROTO_TCP || + skb->data[9] == IPPROTO_UDP)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + if (adapter->vlan_grp && p->vlan_valid) + vlan_hwaccel_rx(skb, adapter->vlan_grp, + ntohs(p->vlan)); + else + netif_rx(skb); + } + + if (++Q->cidx == Q->entries_n) + Q->cidx = 0; + + if (unlikely(--Q->credits < Q->entries_n - SGE_FREEL_REFILL_THRESH)) + refill_free_list(sge, Q); + return 1; +} + + +/* + * Adaptive interrupt timer logic to keep the CPU utilization to + * manageable levels. Basically, as the Average Packet Size (APS) + * gets higher, the interrupt latency setting gets longer. Every + * SGE_INTR_BUCKETSIZE (of 100B) causes a bump of 2usec to the + * base value of SGE_INTRTIMER0. At large values of payload the + * latency hits the ceiling value of SGE_INTRTIMER1 stored at + * index SGE_INTR_MAXBUCKETS-1 in sge->intrtimer[]. + * + * sge->currIndex caches the last index to save unneeded PIOs. + */ +static inline void update_intr_timer(struct sge *sge, unsigned int avg_payload) +{ + unsigned int newIndex; + + newIndex = avg_payload / SGE_INTR_BUCKETSIZE; + if (newIndex > SGE_INTR_MAXBUCKETS - 1) { + newIndex = SGE_INTR_MAXBUCKETS - 1; + } + /* Save a PIO with this check....maybe */ + if (newIndex != sge->currIndex) { + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, + sge->intrtimer[newIndex]); + sge->currIndex = newIndex; + sge->adapter->params.sge.last_rx_coalesce_raw = + sge->intrtimer[newIndex]; + } +} + +/* + * Returns true if command queue q_num has enough available descriptors that + * we can resume Tx operation after temporarily disabling its packet queue. + */ +static inline int enough_free_Tx_descs(struct sge *sge, int q_num) +{ + return atomic_read(&sge->cmdQ[q_num].credits) > + (sge->cmdQ[q_num].entries_n >> 2); +} + +/* + * Main interrupt handler, optimized assuming that we took a 'DATA' + * interrupt. + * + * 1. Clear the interrupt + * 2. Loop while we find valid descriptors and process them; accumulate + * information that can be processed after the loop + * 3. Tell the SGE at which index we stopped processing descriptors + * 4. Bookkeeping; free TX buffers, ring doorbell if there are any + * outstanding TX buffers waiting, replenish RX buffers, potentially + * reenable upper layers if they were turned off due to lack of TX + * resources which are available again. + * 5. If we took an interrupt, but no valid respQ descriptors was found we + * let the slow_intr_handler run and do error handling. + */ +irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) +{ + struct net_device *netdev; + struct adapter *adapter = cookie; + struct sge *sge = adapter->sge; + struct respQ *Q = &sge->respQ; + unsigned int credits = Q->credits, flags = 0, ret = 0; + unsigned int tot_rxpayload = 0, tot_txpayload = 0, n_rx = 0, n_tx = 0; + unsigned int credits_pend[SGE_CMDQ_N] = { 0, 0 }; + + struct respQ_e *e = &Q->entries[Q->cidx]; + prefetch(e); + + t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_SGE_DATA); + + + while (e->GenerationBit == Q->genbit) { + if (--credits < SGE_RESPQ_REPLENISH_THRES) { + u32 n = Q->entries_n - credits - 1; + + t1_write_reg_4(adapter, A_SG_RSPQUEUECREDIT, n); + credits += n; + } + if (likely(e->DataValid)) { + if (!e->Sop || !e->Eop) + BUG(); + t1_sge_rx(sge, &sge->freelQ[e->FreelistQid], + e->BufferLength, e->Offload); + tot_rxpayload += e->BufferLength; + ++n_rx; + } + flags |= e->Qsleeping; + credits_pend[0] += e->Cmdq0CreditReturn; + credits_pend[1] += e->Cmdq1CreditReturn; + +#ifdef CONFIG_SMP + /* + * If enough cmdQ0 buffers have finished DMAing free them so + * anyone that may be waiting for their release can continue. + * We do this only on MP systems to allow other CPUs to proceed + * promptly. UP systems can wait for the free_cmdQ_buffers() + * calls after this loop as the sole CPU is currently busy in + * this loop. + */ + if (unlikely(credits_pend[0] > SGE_FREEL_REFILL_THRESH)) { + free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], + &tot_txpayload); + n_tx += credits_pend[0]; + credits_pend[0] = 0; + } +#endif + ret++; + e++; + if (unlikely(++Q->cidx == Q->entries_n)) { + Q->cidx = 0; + Q->genbit ^= 1; + e = Q->entries; + } + } + + Q->credits = credits; + t1_write_reg_4(adapter, A_SG_SLEEPING, Q->cidx); + + if (credits_pend[0]) + free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], &tot_txpayload); + if (credits_pend[1]) + free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1], &tot_txpayload); + + /* Do any coalescing and interrupt latency timer adjustments */ + if (adapter->params.sge.coalesce_enable) { + unsigned int avg_txpayload = 0, avg_rxpayload = 0; + + n_tx += credits_pend[0] + credits_pend[1]; + + /* + * Choose larger avg. payload size to increase + * throughput and reduce [CPU util., intr/s.] + * + * Throughput behavior favored in mixed-mode. + */ + if (n_tx) + avg_txpayload = tot_txpayload/n_tx; + if (n_rx) + avg_rxpayload = tot_rxpayload/n_rx; + + if (n_tx && avg_txpayload > avg_rxpayload){ + update_intr_timer(sge, avg_txpayload); + } else if (n_rx) { + update_intr_timer(sge, avg_rxpayload); + } + } + + if (flags & F_CMDQ0_ENABLE) { + struct cmdQ *cmdQ = &sge->cmdQ[0]; + + atomic_set(&cmdQ->asleep, 1); + if (atomic_read(&cmdQ->pio_pidx) != cmdQ->pidx) { + doorbell_pio(sge, F_CMDQ0_ENABLE); + atomic_set(&cmdQ->pio_pidx, cmdQ->pidx); + } + } + if (unlikely(flags & (F_FL0_ENABLE | F_FL1_ENABLE))) + freelQs_empty(sge); + + netdev = adapter->port[0].dev; + if (unlikely(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) && + enough_free_Tx_descs(sge, 0) && + enough_free_Tx_descs(sge, 1))) { + netif_wake_queue(netdev); + } + if (unlikely(!ret)) + ret = t1_slow_intr_handler(adapter); + + return IRQ_RETVAL(ret != 0); +} + +/* + * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. + * + * The code figures out how many entries the sk_buff will require in the + * cmdQ and updates the cmdQ data structure with the state once the enqueue + * has complete. Then, it doesn't access the global structure anymore, but + * uses the corresponding fields on the stack. In conjuction with a spinlock + * around that code, we can make the function reentrant without holding the + * lock when we actually enqueue (which might be expensive, especially on + * architectures with IO MMUs). + */ +static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid) +{ + struct sge *sge = adapter->sge; + struct cmdQ *Q = &sge->cmdQ[qid]; + struct cmdQ_e *e; + struct cmdQ_ce *ce; + dma_addr_t mapping; + unsigned int credits, pidx, genbit; + + unsigned int count = 1 + skb_shinfo(skb)->nr_frags; + + /* + * Coming from the timer + */ + if ((skb == sge->pskb)) { + /* + * Quit if any cmdQ activities + */ + if (!spin_trylock(&Q->Qlock)) + return 0; + if (atomic_read(&Q->credits) != Q->entries_n) { + spin_unlock(&Q->Qlock); + return 0; + } + } + else + spin_lock(&Q->Qlock); + + genbit = Q->genbit; + pidx = Q->pidx; + credits = atomic_read(&Q->credits); + + credits -= count; + atomic_sub(count, &Q->credits); + Q->pidx += count; + if (Q->pidx >= Q->entries_n) { + Q->pidx -= Q->entries_n; + Q->genbit ^= 1; + } + + if (unlikely(credits < (MAX_SKB_FRAGS + 1))) { + sge->intr_cnt.cmdQ_full[qid]++; + netif_stop_queue(adapter->port[0].dev); + } + spin_unlock(&Q->Qlock); + + mapping = pci_map_single(adapter->pdev, skb->data, + skb->len - skb->data_len, PCI_DMA_TODEVICE); + ce = &Q->centries[pidx]; + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); + ce->single = 1; + + e = &Q->entries[pidx]; + e->Sop = 1; + e->DataValid = 1; + e->BufferLength = skb->len - skb->data_len; + e->AddrHigh = (u64)mapping >> 32; + e->AddrLow = (u32)mapping; + + if (--count > 0) { + unsigned int i; + + e->Eop = 0; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + + for (i = 0; i < count; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ce++; e++; + if (++pidx == Q->entries_n) { + pidx = 0; + genbit ^= 1; + ce = Q->centries; + e = Q->entries; + } + + mapping = pci_map_page(adapter->pdev, frag->page, + frag->page_offset, + frag->size, + PCI_DMA_TODEVICE); + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, frag->size); + ce->single = 0; + + e->Sop = 0; + e->DataValid = 1; + e->BufferLength = frag->size; + e->AddrHigh = (u64)mapping >> 32; + e->AddrLow = (u32)mapping; + + if (i < count - 1) { + e->Eop = 0; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + } + } + } + + if (skb != sge->pskb) + ce->skb = skb; + e->Eop = 1; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + + /* + * We always ring the doorbell for cmdQ1. For cmdQ0, we only ring + * the doorbell if the Q is asleep. There is a natural race, where + * the hardware is going to sleep just after we checked, however, + * then the interrupt handler will detect the outstanding TX packet + * and ring the doorbell for us. + */ + if (qid) { + doorbell_pio(sge, F_CMDQ1_ENABLE); + } else if (atomic_read(&Q->asleep)) { + atomic_set(&Q->asleep, 0); + doorbell_pio(sge, F_CMDQ0_ENABLE); + atomic_set(&Q->pio_pidx, Q->pidx); + } + return 0; +} + +#define MK_ETH_TYPE_MSS(type, mss) (((mss) & 0x3FFF) | ((type) << 14)) + +/* + * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. + */ +int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct cpl_tx_pkt *cpl; + struct ethhdr *eth; + size_t max_len; + + /* + * We are using a non-standard hard_header_len and some kernel + * components, such as pktgen, do not handle it right. Complain + * when this happens but try to fix things up. + */ + if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { + struct sk_buff *orig_skb = skb; + + if (net_ratelimit()) + printk(KERN_ERR + "%s: Tx packet has inadequate headroom\n", + dev->name); + skb = skb_realloc_headroom(skb, sizeof(struct cpl_tx_pkt_lso)); + dev_kfree_skb_any(orig_skb); + if (!skb) + return -ENOMEM; + } + + if (skb_shinfo(skb)->tso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + + eth_type = skb->nh.raw - skb->data == ETH_HLEN ? + CPL_ETH_II : CPL_ETH_II_VLAN; + + hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr)); + hdr->opcode = CPL_TX_PKT_LSO; + hdr->ip_csum_dis = hdr->l4_csum_dis = 0; + hdr->ip_hdr_words = skb->nh.iph->ihl; + hdr->tcp_hdr_words = skb->h.th->doff; + hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, + skb_shinfo(skb)->tso_size)); + hdr->len = htonl(skb->len - sizeof(*hdr)); + cpl = (struct cpl_tx_pkt *)hdr; + } else + { + /* + * An Ethernet packet must have at least space for + * the DIX Ethernet header and be no greater than + * the device set MTU. Otherwise trash the packet. + */ + if (skb->len < ETH_HLEN) + goto t1_start_xmit_fail2; + eth = (struct ethhdr *)skb->data; + if (eth->h_proto == htons(ETH_P_8021Q)) + max_len = dev->mtu + VLAN_ETH_HLEN; + else + max_len = dev->mtu + ETH_HLEN; + if (skb->len > max_len) + goto t1_start_xmit_fail2; + + if (!(adapter->flags & UDP_CSUM_CAPABLE) && + skb->ip_summed == CHECKSUM_HW && + skb->nh.iph->protocol == IPPROTO_UDP && + skb_checksum_help(skb, 0)) + goto t1_start_xmit_fail3; + + + if (!adapter->sge->pskb) { + if (skb->protocol == htons(ETH_P_ARP) && + skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) + adapter->sge->pskb = skb; + } + cpl = (struct cpl_tx_pkt *)skb_push(skb, sizeof(*cpl)); + cpl->opcode = CPL_TX_PKT; + cpl->ip_csum_dis = 1; /* SW calculates IP csum */ + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; + /* the length field isn't used so don't bother setting it */ + } + cpl->iff = dev->if_port; + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (adapter->vlan_grp && vlan_tx_tag_present(skb)) { + cpl->vlan_valid = 1; + cpl->vlan = htons(vlan_tx_tag_get(skb)); + } else +#endif + cpl->vlan_valid = 0; + + dev->trans_start = jiffies; + return t1_sge_tx(skb, adapter, 0); + +t1_start_xmit_fail3: + printk(KERN_INFO "%s: Unable to complete checksum\n", dev->name); + goto t1_start_xmit_fail1; + +t1_start_xmit_fail2: + printk(KERN_INFO "%s: Invalid packet length %d, dropping\n", + dev->name, skb->len); + +t1_start_xmit_fail1: + dev_kfree_skb_any(skb); + return 0; +} + +void t1_sge_set_ptimeout(adapter_t *adapter, u32 val) +{ + struct sge *sge = adapter->sge; + + if (is_T2(adapter)) + sge->ptimeout = max((u32)((HZ * val) / 1000), (u32)1); +} + +u32 t1_sge_get_ptimeout(adapter_t *adapter) +{ + struct sge *sge = adapter->sge; + + return (is_T2(adapter) ? ((sge->ptimeout * 1000) / HZ) : 0); +} + diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h new file mode 100644 index 000000000000..140f896def60 --- /dev/null +++ b/drivers/net/chelsio/sge.h @@ -0,0 +1,79 @@ +/***************************************************************************** + * * + * File: sge.h * + * $Revision: 1.7 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef _CHELSIO_LINUX_SGE_H_ +#define _CHELSIO_LINUX_SGE_H_ + +#include +#include +#include + +struct sge_intr_counts { + unsigned int respQ_empty; /* # times respQ empty */ + unsigned int respQ_overflow; /* # respQ overflow (fatal) */ + unsigned int freelistQ_empty; /* # times freelist empty */ + unsigned int pkt_too_big; /* packet too large (fatal) */ + unsigned int pkt_mismatch; + unsigned int cmdQ_full[2]; /* not HW interrupt, host cmdQ[] full */ +}; + +struct sk_buff; +struct net_device; +struct cxgbdev; +struct adapter; +struct sge_params; +struct sge; + +struct sge *t1_sge_create(struct adapter *, struct sge_params *); +int t1_sge_configure(struct sge *, struct sge_params *); +int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); +void t1_sge_destroy(struct sge *); +irqreturn_t t1_interrupt(int, void *, struct pt_regs *); +int t1_start_xmit(struct sk_buff *skb, struct net_device *dev); +void t1_set_vlan_accel(struct adapter *adapter, int on_off); +void t1_sge_start(struct sge *); +void t1_sge_stop(struct sge *); +int t1_sge_intr_error_handler(struct sge *); +void t1_sge_intr_enable(struct sge *); +void t1_sge_intr_disable(struct sge *); +void t1_sge_intr_clear(struct sge *); + +void t1_sge_set_ptimeout(adapter_t *adapter, u32 val); +u32 t1_sge_get_ptimeout(adapter_t *adapter); + +#endif /* _CHELSIO_LINUX_SGE_H_ */ diff --git a/drivers/net/chelsio/subr.c b/drivers/net/chelsio/subr.c new file mode 100644 index 000000000000..a90a3f95fcac --- /dev/null +++ b/drivers/net/chelsio/subr.c @@ -0,0 +1,831 @@ +/***************************************************************************** + * * + * File: subr.c * + * $Revision: 1.12 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * Various subroutines (intr,pio,etc.) used by Chelsio 10G Ethernet driver. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" +#include "elmer0.h" +#include "regs.h" + +#include "gmac.h" +#include "cphy.h" +#include "sge.h" +#include "tp.h" +#include "espi.h" + +/** + * t1_wait_op_done - wait until an operation is completed + * @adapter: the adapter performing the operation + * @reg: the register to check for completion + * @mask: a single-bit field within @reg that indicates completion + * @polarity: the value of the field when the operation is completed + * @attempts: number of check iterations + * @delay: delay in usecs between iterations + * + * Wait until an operation is completed by checking a bit in a register + * up to @attempts times. Returns %0 if the operation completes and %1 + * otherwise. + */ +static int t1_wait_op_done(adapter_t *adapter, int reg, u32 mask, int polarity, + int attempts, int delay) +{ + while (1) { + u32 val = t1_read_reg_4(adapter, reg) & mask; + + if (!!val == polarity) + return 0; + if (--attempts == 0) + return 1; + if (delay) + udelay(delay); + } +} + +#define TPI_ATTEMPTS 50 + +/* + * Write a register over the TPI interface (unlocked and locked versions). + */ +static int __t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) +{ + int tpi_busy; + + t1_write_reg_4(adapter, A_TPI_ADDR, addr); + t1_write_reg_4(adapter, A_TPI_WR_DATA, value); + t1_write_reg_4(adapter, A_TPI_CSR, F_TPIWR); + + tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, + TPI_ATTEMPTS, 3); + if (tpi_busy) + CH_ALERT("%s: TPI write to 0x%x failed\n", + adapter->name, addr); + return tpi_busy; +} + +int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) +{ + int ret; + + TPI_LOCK(adapter); + ret = __t1_tpi_write(adapter, addr, value); + TPI_UNLOCK(adapter); + return ret; +} + +/* + * Read a register over the TPI interface (unlocked and locked versions). + */ +static int __t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) +{ + int tpi_busy; + + t1_write_reg_4(adapter, A_TPI_ADDR, addr); + t1_write_reg_4(adapter, A_TPI_CSR, 0); + + tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, + TPI_ATTEMPTS, 3); + if (tpi_busy) + CH_ALERT("%s: TPI read from 0x%x failed\n", + adapter->name, addr); + else + *valp = t1_read_reg_4(adapter, A_TPI_RD_DATA); + return tpi_busy; +} + +int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) +{ + int ret; + + TPI_LOCK(adapter); + ret = __t1_tpi_read(adapter, addr, valp); + TPI_UNLOCK(adapter); + return ret; +} + +/* + * Set a TPI parameter. + */ +static void t1_tpi_par(adapter_t *adapter, u32 value) +{ + t1_write_reg_4(adapter, A_TPI_PAR, V_TPIPAR(value)); +} + +/* + * Called when a port's link settings change to propagate the new values to the + * associated PHY and MAC. After performing the common tasks it invokes an + * OS-specific handler. + */ +/* static */ void link_changed(adapter_t *adapter, int port_id) +{ + int link_ok, speed, duplex, fc; + struct cphy *phy = adapter->port[port_id].phy; + struct link_config *lc = &adapter->port[port_id].link_config; + + phy->ops->get_link_status(phy, &link_ok, &speed, &duplex, &fc); + + lc->speed = speed < 0 ? SPEED_INVALID : speed; + lc->duplex = duplex < 0 ? DUPLEX_INVALID : duplex; + if (!(lc->requested_fc & PAUSE_AUTONEG)) + fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); + + if (link_ok && speed >= 0 && lc->autoneg == AUTONEG_ENABLE) { + /* Set MAC speed, duplex, and flow control to match PHY. */ + struct cmac *mac = adapter->port[port_id].mac; + + mac->ops->set_speed_duplex_fc(mac, speed, duplex, fc); + lc->fc = (unsigned char)fc; + } + t1_link_changed(adapter, port_id, link_ok, speed, duplex, fc); +} + +static int t1_pci_intr_handler(adapter_t *adapter) +{ + u32 pcix_cause; + + pci_read_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, &pcix_cause); + + if (pcix_cause) { + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, + pcix_cause); + t1_fatal_err(adapter); /* PCI errors are fatal */ + } + return 0; +} + + +/* + * Wait until Elmer's MI1 interface is ready for new operations. + */ +static int mi1_wait_until_ready(adapter_t *adapter, int mi1_reg) +{ + int attempts = 100, busy; + + do { + u32 val; + + __t1_tpi_read(adapter, mi1_reg, &val); + busy = val & F_MI1_OP_BUSY; + if (busy) + udelay(10); + } while (busy && --attempts); + if (busy) + CH_ALERT("%s: MDIO operation timed out\n", + adapter->name); + return busy; +} + +/* + * MI1 MDIO initialization. + */ +static void mi1_mdio_init(adapter_t *adapter, const struct board_info *bi) +{ + u32 clkdiv = bi->clock_elmer0 / (2 * bi->mdio_mdc) - 1; + u32 val = F_MI1_PREAMBLE_ENABLE | V_MI1_MDI_INVERT(bi->mdio_mdiinv) | + V_MI1_MDI_ENABLE(bi->mdio_mdien) | V_MI1_CLK_DIV(clkdiv); + + if (!(bi->caps & SUPPORTED_10000baseT_Full)) + val |= V_MI1_SOF(1); + t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_CFG, val); +} + +static int mi1_mdio_ext_read(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int *valp) +{ + u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); + + TPI_LOCK(adapter); + + /* Write the address we want. */ + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_DATA, reg_addr); + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, + MI1_OP_INDIRECT_ADDRESS); + mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); + + /* Write the operation we want. */ + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, MI1_OP_INDIRECT_READ); + mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); + + /* Read the data. */ + __t1_tpi_read(adapter, A_ELMER0_PORT0_MI1_DATA, valp); + TPI_UNLOCK(adapter); + return 0; +} + +static int mi1_mdio_ext_write(adapter_t *adapter, int phy_addr, int mmd_addr, + int reg_addr, unsigned int val) +{ + u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); + + TPI_LOCK(adapter); + + /* Write the address we want. */ + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_DATA, reg_addr); + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, + MI1_OP_INDIRECT_ADDRESS); + mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); + + /* Write the data. */ + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_DATA, val); + __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, MI1_OP_INDIRECT_WRITE); + mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); + TPI_UNLOCK(adapter); + return 0; +} + +static struct mdio_ops mi1_mdio_ext_ops = { + mi1_mdio_init, + mi1_mdio_ext_read, + mi1_mdio_ext_write +}; + +enum { + CH_BRD_N110_1F, + CH_BRD_N210_1F, + CH_BRD_T210_1F, +}; + +static struct board_info t1_board[] = { + +{ CHBT_BOARD_N110, 1/*ports#*/, + SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE /*caps*/, CHBT_TERM_T1, + CHBT_MAC_PM3393, CHBT_PHY_88X2010, + 125000000/*clk-core*/, 0/*clk-mc3*/, 0/*clk-mc4*/, + 1/*espi-ports*/, 0/*clk-cspi*/, 44/*clk-elmer0*/, 0/*mdien*/, + 0/*mdiinv*/, 1/*mdc*/, 0/*phybaseaddr*/, &t1_pm3393_ops, + &t1_mv88x201x_ops, &mi1_mdio_ext_ops, + "Chelsio N110 1x10GBaseX NIC" }, + +{ CHBT_BOARD_N210, 1/*ports#*/, + SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE /*caps*/, CHBT_TERM_T2, + CHBT_MAC_PM3393, CHBT_PHY_88X2010, + 125000000/*clk-core*/, 0/*clk-mc3*/, 0/*clk-mc4*/, + 1/*espi-ports*/, 0/*clk-cspi*/, 44/*clk-elmer0*/, 0/*mdien*/, + 0/*mdiinv*/, 1/*mdc*/, 0/*phybaseaddr*/, &t1_pm3393_ops, + &t1_mv88x201x_ops, &mi1_mdio_ext_ops, + "Chelsio N210 1x10GBaseX NIC" }, + +}; + +struct pci_device_id t1_pci_tbl[] = { + CH_DEVICE(7, 0, CH_BRD_N110_1F), + CH_DEVICE(10, 1, CH_BRD_N210_1F), + { 0, } +}; + +/* + * Return the board_info structure with a given index. Out-of-range indices + * return NULL. + */ +const struct board_info *t1_get_board_info(unsigned int board_id) +{ + return board_id < DIMOF(t1_board) ? &t1_board[board_id] : NULL; +} + +struct chelsio_vpd_t { + u32 format_version; + u8 serial_number[16]; + u8 mac_base_address[6]; + u8 pad[2]; /* make multiple-of-4 size requirement explicit */ +}; + +#define EEPROMSIZE (8 * 1024) +#define EEPROM_MAX_POLL 4 + +/* + * Read SEEPROM. A zero is written to the flag register when the addres is + * written to the Control register. The hardware device will set the flag to a + * one when 4B have been transferred to the Data register. + */ +int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data) +{ + int i = EEPROM_MAX_POLL; + u16 val; + + if (addr >= EEPROMSIZE || (addr & 3)) + return -EINVAL; + + pci_write_config_word(adapter->pdev, A_PCICFG_VPD_ADDR, (u16)addr); + do { + udelay(50); + pci_read_config_word(adapter->pdev, A_PCICFG_VPD_ADDR, &val); + } while (!(val & F_VPD_OP_FLAG) && --i); + + if (!(val & F_VPD_OP_FLAG)) { + CH_ERR("%s: reading EEPROM address 0x%x failed\n", + adapter->name, addr); + return -EIO; + } + pci_read_config_dword(adapter->pdev, A_PCICFG_VPD_DATA, data); + *data = le32_to_cpu(*data); + return 0; +} + +static int t1_eeprom_vpd_get(adapter_t *adapter, struct chelsio_vpd_t *vpd) +{ + int addr, ret = 0; + + for (addr = 0; !ret && addr < sizeof(*vpd); addr += sizeof(u32)) + ret = t1_seeprom_read(adapter, addr, + (u32 *)((u8 *)vpd + addr)); + + return ret; +} + +/* + * Read a port's MAC address from the VPD ROM. + */ +static int vpd_macaddress_get(adapter_t *adapter, int index, u8 mac_addr[]) +{ + struct chelsio_vpd_t vpd; + + if (t1_eeprom_vpd_get(adapter, &vpd)) + return 1; + memcpy(mac_addr, vpd.mac_base_address, 5); + mac_addr[5] = vpd.mac_base_address[5] + index; + return 0; +} + +/* + * Set up the MAC/PHY according to the requested link settings. + * + * If the PHY can auto-negotiate first decide what to advertise, then + * enable/disable auto-negotiation as desired and reset. + * + * If the PHY does not auto-negotiate we just reset it. + * + * If auto-negotiation is off set the MAC to the proper speed/duplex/FC, + * otherwise do it later based on the outcome of auto-negotiation. + */ +int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc) +{ + unsigned int fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); + + if (lc->supported & SUPPORTED_Autoneg) { + lc->advertising &= ~(ADVERTISED_ASYM_PAUSE | ADVERTISED_PAUSE); + if (fc) { + lc->advertising |= ADVERTISED_ASYM_PAUSE; + if (fc == (PAUSE_RX | PAUSE_TX)) + lc->advertising |= ADVERTISED_PAUSE; + } + phy->ops->advertise(phy, lc->advertising); + + if (lc->autoneg == AUTONEG_DISABLE) { + lc->speed = lc->requested_speed; + lc->duplex = lc->requested_duplex; + lc->fc = (unsigned char)fc; + mac->ops->set_speed_duplex_fc(mac, lc->speed, + lc->duplex, fc); + /* Also disables autoneg */ + phy->ops->set_speed_duplex(phy, lc->speed, lc->duplex); + phy->ops->reset(phy, 0); + } else + phy->ops->autoneg_enable(phy); /* also resets PHY */ + } else { + mac->ops->set_speed_duplex_fc(mac, -1, -1, fc); + lc->fc = (unsigned char)fc; + phy->ops->reset(phy, 0); + } + return 0; +} + +/* + * External interrupt handler for boards using elmer0. + */ +int elmer0_ext_intr_handler(adapter_t *adapter) +{ + struct cphy *phy; + int phy_cause; + u32 cause; + + t1_tpi_read(adapter, A_ELMER0_INT_CAUSE, &cause); + + switch (board_info(adapter)->board) { + case CHBT_BOARD_CHT210: + case CHBT_BOARD_N210: + case CHBT_BOARD_N110: + if (cause & ELMER0_GP_BIT6) { /* Marvell 88x2010 interrupt */ + phy = adapter->port[0].phy; + phy_cause = phy->ops->interrupt_handler(phy); + if (phy_cause & cphy_cause_link_change) + link_changed(adapter, 0); + } + break; + case CHBT_BOARD_8000: + case CHBT_BOARD_CHT110: + CH_DBG(adapter, INTR, "External interrupt cause 0x%x\n", + cause); + if (cause & ELMER0_GP_BIT1) { /* PMC3393 INTB */ + struct cmac *mac = adapter->port[0].mac; + + mac->ops->interrupt_handler(mac); + } + if (cause & ELMER0_GP_BIT5) { /* XPAK MOD_DETECT */ + u32 mod_detect; + + t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); + CH_MSG(adapter, INFO, LINK, "XPAK %s\n", + mod_detect ? "removed" : "inserted"); + } + break; + } + t1_tpi_write(adapter, A_ELMER0_INT_CAUSE, cause); + return 0; +} + +/* Enables all interrupts. */ +void t1_interrupts_enable(adapter_t *adapter) +{ + unsigned int i; + + adapter->slow_intr_mask = F_PL_INTR_SGE_ERR | F_PL_INTR_TP; + + t1_sge_intr_enable(adapter->sge); + t1_tp_intr_enable(adapter->tp); + if (adapter->espi) { + adapter->slow_intr_mask |= F_PL_INTR_ESPI; + t1_espi_intr_enable(adapter->espi); + } + + /* Enable MAC/PHY interrupts for each port. */ + for_each_port(adapter, i) { + adapter->port[i].mac->ops->interrupt_enable(adapter->port[i].mac); + adapter->port[i].phy->ops->interrupt_enable(adapter->port[i].phy); + } + + /* Enable PCIX & external chip interrupts on ASIC boards. */ + if (t1_is_asic(adapter)) { + u32 pl_intr = t1_read_reg_4(adapter, A_PL_ENABLE); + + /* PCI-X interrupts */ + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, + 0xffffffff); + + adapter->slow_intr_mask |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + pl_intr |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + t1_write_reg_4(adapter, A_PL_ENABLE, pl_intr); + } +} + +/* Disables all interrupts. */ +void t1_interrupts_disable(adapter_t* adapter) +{ + unsigned int i; + + t1_sge_intr_disable(adapter->sge); + t1_tp_intr_disable(adapter->tp); + if (adapter->espi) + t1_espi_intr_disable(adapter->espi); + + /* Disable MAC/PHY interrupts for each port. */ + for_each_port(adapter, i) { + adapter->port[i].mac->ops->interrupt_disable(adapter->port[i].mac); + adapter->port[i].phy->ops->interrupt_disable(adapter->port[i].phy); + } + + /* Disable PCIX & external chip interrupts. */ + if (t1_is_asic(adapter)) + t1_write_reg_4(adapter, A_PL_ENABLE, 0); + + /* PCI-X interrupts */ + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, 0); + + adapter->slow_intr_mask = 0; +} + +/* Clears all interrupts */ +void t1_interrupts_clear(adapter_t* adapter) +{ + unsigned int i; + + t1_sge_intr_clear(adapter->sge); + t1_tp_intr_clear(adapter->tp); + if (adapter->espi) + t1_espi_intr_clear(adapter->espi); + + /* Clear MAC/PHY interrupts for each port. */ + for_each_port(adapter, i) { + adapter->port[i].mac->ops->interrupt_clear(adapter->port[i].mac); + adapter->port[i].phy->ops->interrupt_clear(adapter->port[i].phy); + } + + /* Enable interrupts for external devices. */ + if (t1_is_asic(adapter)) { + u32 pl_intr = t1_read_reg_4(adapter, A_PL_CAUSE); + + t1_write_reg_4(adapter, A_PL_CAUSE, + pl_intr | F_PL_INTR_EXT | F_PL_INTR_PCIX); + } + + /* PCI-X interrupts */ + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, 0xffffffff); +} + +/* + * Slow path interrupt handler for ASICs. + */ +static int asic_slow_intr(adapter_t *adapter) +{ + u32 cause = t1_read_reg_4(adapter, A_PL_CAUSE); + + cause &= adapter->slow_intr_mask; + if (!cause) + return 0; + if (cause & F_PL_INTR_SGE_ERR) + t1_sge_intr_error_handler(adapter->sge); + if (cause & F_PL_INTR_TP) + t1_tp_intr_handler(adapter->tp); + if (cause & F_PL_INTR_ESPI) + t1_espi_intr_handler(adapter->espi); + if (cause & F_PL_INTR_PCIX) + t1_pci_intr_handler(adapter); + if (cause & F_PL_INTR_EXT) + t1_elmer0_ext_intr(adapter); + + /* Clear the interrupts just processed. */ + t1_write_reg_4(adapter, A_PL_CAUSE, cause); + (void)t1_read_reg_4(adapter, A_PL_CAUSE); /* flush writes */ + return 1; +} + +int t1_slow_intr_handler(adapter_t *adapter) +{ + return asic_slow_intr(adapter); +} + +/* Power sequencing is a work-around for Intel's XPAKs. */ +static void power_sequence_xpak(adapter_t* adapter) +{ + u32 mod_detect; + u32 gpo; + + /* Check for XPAK */ + t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); + if (!(ELMER0_GP_BIT5 & mod_detect)) { + /* XPAK is present */ + t1_tpi_read(adapter, A_ELMER0_GPO, &gpo); + gpo |= ELMER0_GP_BIT18; + t1_tpi_write(adapter, A_ELMER0_GPO, gpo); + } +} + +int __devinit t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, + struct adapter_params *p) +{ + p->chip_version = bi->chip_term; + p->is_asic = (p->chip_version != CHBT_TERM_FPGA); + if (p->chip_version == CHBT_TERM_T1 || + p->chip_version == CHBT_TERM_T2 || + p->chip_version == CHBT_TERM_FPGA) { + u32 val = t1_read_reg_4(adapter, A_TP_PC_CONFIG); + + val = G_TP_PC_REV(val); + if (val == 2) + p->chip_revision = TERM_T1B; + else if (val == 3) + p->chip_revision = TERM_T2; + else + return -1; + } else + return -1; + return 0; +} + +/* + * Enable board components other than the Chelsio chip, such as external MAC + * and PHY. + */ +static int board_init(adapter_t *adapter, const struct board_info *bi) +{ + switch (bi->board) { + case CHBT_BOARD_8000: + case CHBT_BOARD_N110: + case CHBT_BOARD_N210: + case CHBT_BOARD_CHT210: + case CHBT_BOARD_COUGAR: + t1_tpi_par(adapter, 0xf); + t1_tpi_write(adapter, A_ELMER0_GPO, 0x800); + break; + case CHBT_BOARD_CHT110: + t1_tpi_par(adapter, 0xf); + t1_tpi_write(adapter, A_ELMER0_GPO, 0x1800); + + /* TBD XXX Might not need. This fixes a problem + * described in the Intel SR XPAK errata. + */ + power_sequence_xpak(adapter); + break; + } + return 0; +} + +/* + * Initialize and configure the Terminator HW modules. Note that external + * MAC and PHYs are initialized separately. + */ +int t1_init_hw_modules(adapter_t *adapter) +{ + int err = -EIO; + const struct board_info *bi = board_info(adapter); + + if (!adapter->mc4) { + u32 val = t1_read_reg_4(adapter, A_MC4_CFG); + + t1_write_reg_4(adapter, A_MC4_CFG, val | F_READY | F_MC4_SLOW); + t1_write_reg_4(adapter, A_MC5_CONFIG, + F_M_BUS_ENABLE | F_TCAM_RESET); + } + + if (adapter->espi && t1_espi_init(adapter->espi, bi->chip_mac, + bi->espi_nports)) + goto out_err; + + if (t1_tp_reset(adapter->tp, &adapter->params.tp, bi->clock_core)) + goto out_err; + + err = t1_sge_configure(adapter->sge, &adapter->params.sge); + if (err) + goto out_err; + + err = 0; + out_err: + return err; +} + +/* + * Determine a card's PCI mode. + */ +static void __devinit get_pci_mode(adapter_t *adapter, struct pci_params *p) +{ + static unsigned short speed_map[] = { 33, 66, 100, 133 }; + u32 pci_mode; + + pci_read_config_dword(adapter->pdev, A_PCICFG_MODE, &pci_mode); + p->speed = speed_map[G_PCI_MODE_CLK(pci_mode)]; + p->width = (pci_mode & F_PCI_MODE_64BIT) ? 64 : 32; + p->is_pcix = (pci_mode & F_PCI_MODE_PCIX) != 0; +} + +/* + * Release the structures holding the SW per-Terminator-HW-module state. + */ +void t1_free_sw_modules(adapter_t *adapter) +{ + unsigned int i; + + for_each_port(adapter, i) { + struct cmac *mac = adapter->port[i].mac; + struct cphy *phy = adapter->port[i].phy; + + if (mac) + mac->ops->destroy(mac); + if (phy) + phy->ops->destroy(phy); + } + + if (adapter->sge) + t1_sge_destroy(adapter->sge); + if (adapter->tp) + t1_tp_destroy(adapter->tp); + if (adapter->espi) + t1_espi_destroy(adapter->espi); +} + +static void __devinit init_link_config(struct link_config *lc, + const struct board_info *bi) +{ + lc->supported = bi->caps; + lc->requested_speed = lc->speed = SPEED_INVALID; + lc->requested_duplex = lc->duplex = DUPLEX_INVALID; + lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX; + if (lc->supported & SUPPORTED_Autoneg) { + lc->advertising = lc->supported; + lc->autoneg = AUTONEG_ENABLE; + lc->requested_fc |= PAUSE_AUTONEG; + } else { + lc->advertising = 0; + lc->autoneg = AUTONEG_DISABLE; + } +} + + +/* + * Allocate and initialize the data structures that hold the SW state of + * the Terminator HW modules. + */ +int __devinit t1_init_sw_modules(adapter_t *adapter, + const struct board_info *bi) +{ + unsigned int i; + + adapter->params.brd_info = bi; + adapter->params.nports = bi->port_number; + adapter->params.stats_update_period = bi->gmac->stats_update_period; + + adapter->sge = t1_sge_create(adapter, &adapter->params.sge); + if (!adapter->sge) { + CH_ERR("%s: SGE initialization failed\n", + adapter->name); + goto error; + } + + + + if (bi->espi_nports && !(adapter->espi = t1_espi_create(adapter))) { + CH_ERR("%s: ESPI initialization failed\n", + adapter->name); + goto error; + } + + adapter->tp = t1_tp_create(adapter, &adapter->params.tp); + if (!adapter->tp) { + CH_ERR("%s: TP initialization failed\n", + adapter->name); + goto error; + } + + board_init(adapter, bi); + bi->mdio_ops->init(adapter, bi); + if (bi->gphy->reset) + bi->gphy->reset(adapter); + if (bi->gmac->reset) + bi->gmac->reset(adapter); + + for_each_port(adapter, i) { + u8 hw_addr[6]; + struct cmac *mac; + int phy_addr = bi->mdio_phybaseaddr + i; + + adapter->port[i].phy = bi->gphy->create(adapter, phy_addr, + bi->mdio_ops); + if (!adapter->port[i].phy) { + CH_ERR("%s: PHY %d initialization failed\n", + adapter->name, i); + goto error; + } + + adapter->port[i].mac = mac = bi->gmac->create(adapter, i); + if (!mac) { + CH_ERR("%s: MAC %d initialization failed\n", + adapter->name, i); + goto error; + } + + /* + * Get the port's MAC addresses either from the EEPROM if one + * exists or the one hardcoded in the MAC. + */ + if (!t1_is_asic(adapter) || bi->chip_mac == CHBT_MAC_DUMMY) + mac->ops->macaddress_get(mac, hw_addr); + else if (vpd_macaddress_get(adapter, i, hw_addr)) { + CH_ERR("%s: could not read MAC address from VPD ROM\n", + port_name(adapter, i)); + goto error; + } + t1_set_hw_addr(adapter, i, hw_addr); + init_link_config(&adapter->port[i].link_config, bi); + } + + get_pci_mode(adapter, &adapter->params.pci); + t1_interrupts_clear(adapter); + return 0; + + error: + t1_free_sw_modules(adapter); + return -1; +} diff --git a/drivers/net/chelsio/suni1x10gexp_regs.h b/drivers/net/chelsio/suni1x10gexp_regs.h new file mode 100644 index 000000000000..98352bdda89b --- /dev/null +++ b/drivers/net/chelsio/suni1x10gexp_regs.h @@ -0,0 +1,221 @@ +/***************************************************************************** + * * + * File: suni1x10gexp_regs.h * + * $Revision: 1.4 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * PMC/SIERRA (pm3393) MAC-PHY functionality. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef _SUNI1x10GEXP_REGS_H +#define _SUNI1x10GEXP_REGS_H + +/******************************************************************************/ +/** S/UNI-1x10GE-XP REGISTER ADDRESS MAP **/ +/******************************************************************************/ +/* Refer to the Register Bit Masks bellow for the naming of each register and */ +/* to the S/UNI-1x10GE-XP Data Sheet for the signification of each bit */ +/******************************************************************************/ + +#define SUNI1x10GEXP_REG_DEVICE_STATUS 0x0004 +#define SUNI1x10GEXP_REG_MASTER_INTERRUPT_STATUS 0x000D +#define SUNI1x10GEXP_REG_GLOBAL_INTERRUPT_ENABLE 0x000E +#define SUNI1x10GEXP_REG_SERDES_3125_INTERRUPT_ENABLE 0x0102 +#define SUNI1x10GEXP_REG_SERDES_3125_INTERRUPT_STATUS 0x0104 +#define SUNI1x10GEXP_REG_RXXG_CONFIG_1 0x2040 +#define SUNI1x10GEXP_REG_RXXG_CONFIG_3 0x2042 +#define SUNI1x10GEXP_REG_RXXG_INTERRUPT 0x2043 +#define SUNI1x10GEXP_REG_RXXG_MAX_FRAME_LENGTH 0x2045 +#define SUNI1x10GEXP_REG_RXXG_SA_15_0 0x2046 +#define SUNI1x10GEXP_REG_RXXG_SA_31_16 0x2047 +#define SUNI1x10GEXP_REG_RXXG_SA_47_32 0x2048 +#define SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_LOW 0x204D +#define SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_MID 0x204E +#define SUNI1x10GEXP_REG_RXXG_EXACT_MATCH_ADDR_1_HIGH 0x204F +#define SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_LOW 0x206A +#define SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDLOW 0x206B +#define SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_MIDHIGH 0x206C +#define SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_HIGH 0x206D +#define SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_0 0x206E +#define SUNI1x10GEXP_REG_RXXG_ADDRESS_FILTER_CONTROL_2 0x2070 +#define SUNI1x10GEXP_REG_XRF_INTERRUPT_ENABLE 0x2088 +#define SUNI1x10GEXP_REG_XRF_INTERRUPT_STATUS 0x2089 +#define SUNI1x10GEXP_REG_XRF_DIAG_INTERRUPT_ENABLE 0x208B +#define SUNI1x10GEXP_REG_XRF_DIAG_INTERRUPT_STATUS 0x208C +#define SUNI1x10GEXP_REG_RXOAM_INTERRUPT_ENABLE 0x20C7 +#define SUNI1x10GEXP_REG_RXOAM_INTERRUPT_STATUS 0x20C8 +#define SUNI1x10GEXP_REG_MSTAT_CONTROL 0x2100 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_0 0x2101 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_1 0x2102 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_2 0x2103 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_ROLLOVER_3 0x2104 +#define SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_0 0x2105 +#define SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_1 0x2106 +#define SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_2 0x2107 +#define SUNI1x10GEXP_REG_MSTAT_INTERRUPT_MASK_3 0x2108 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_0_LOW 0x2110 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_1_LOW 0x2114 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_4_LOW 0x2120 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_5_LOW 0x2124 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_6_LOW 0x2128 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_8_LOW 0x2130 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_10_LOW 0x2138 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_11_LOW 0x213C +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_12_LOW 0x2140 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_13_LOW 0x2144 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_15_LOW 0x214C +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_16_LOW 0x2150 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_17_LOW 0x2154 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_18_LOW 0x2158 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_33_LOW 0x2194 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_35_LOW 0x219C +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_36_LOW 0x21A0 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_38_LOW 0x21A8 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_40_LOW 0x21B0 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_42_LOW 0x21B8 +#define SUNI1x10GEXP_REG_MSTAT_COUNTER_43_LOW 0x21BC +#define SUNI1x10GEXP_REG_IFLX_FIFO_OVERFLOW_ENABLE 0x2209 +#define SUNI1x10GEXP_REG_IFLX_FIFO_OVERFLOW_INTERRUPT 0x220A +#define SUNI1x10GEXP_REG_PL4ODP_INTERRUPT_MASK 0x2282 +#define SUNI1x10GEXP_REG_PL4ODP_INTERRUPT 0x2283 +#define SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_STATUS 0x2300 +#define SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_CHANGE 0x2301 +#define SUNI1x10GEXP_REG_PL4IO_LOCK_DETECT_MASK 0x2302 +#define SUNI1x10GEXP_REG_TXXG_CONFIG_1 0x3040 +#define SUNI1x10GEXP_REG_TXXG_CONFIG_3 0x3042 +#define SUNI1x10GEXP_REG_TXXG_INTERRUPT 0x3043 +#define SUNI1x10GEXP_REG_TXXG_MAX_FRAME_SIZE 0x3045 +#define SUNI1x10GEXP_REG_TXXG_SA_15_0 0x3047 +#define SUNI1x10GEXP_REG_TXXG_SA_31_16 0x3048 +#define SUNI1x10GEXP_REG_TXXG_SA_47_32 0x3049 +#define SUNI1x10GEXP_REG_XTEF_INTERRUPT_STATUS 0x3084 +#define SUNI1x10GEXP_REG_XTEF_INTERRUPT_ENABLE 0x3085 +#define SUNI1x10GEXP_REG_TXOAM_INTERRUPT_ENABLE 0x30C6 +#define SUNI1x10GEXP_REG_TXOAM_INTERRUPT_STATUS 0x30C7 +#define SUNI1x10GEXP_REG_EFLX_FIFO_OVERFLOW_ERROR_ENABLE 0x320C +#define SUNI1x10GEXP_REG_EFLX_FIFO_OVERFLOW_ERROR_INDICATION 0x320D +#define SUNI1x10GEXP_REG_PL4IDU_INTERRUPT_MASK 0x3282 +#define SUNI1x10GEXP_REG_PL4IDU_INTERRUPT 0x3283 + +/******************************************************************************/ +/* -- End register offset definitions -- */ +/******************************************************************************/ + +/******************************************************************************/ +/** SUNI-1x10GE-XP REGISTER BIT MASKS **/ +/******************************************************************************/ + +/*---------------------------------------------------------------------------- + * Register 0x0004: S/UNI-1x10GE-XP Device Status + * Bit 9 TOP_SXRA_EXPIRED + * Bit 8 TOP_MDIO_BUSY + * Bit 7 TOP_DTRB + * Bit 6 TOP_EXPIRED + * Bit 5 TOP_PAUSED + * Bit 4 TOP_PL4_ID_DOOL + * Bit 3 TOP_PL4_IS_DOOL + * Bit 2 TOP_PL4_ID_ROOL + * Bit 1 TOP_PL4_IS_ROOL + * Bit 0 TOP_PL4_OUT_ROOL + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_TOP_SXRA_EXPIRED 0x0200 +#define SUNI1x10GEXP_BITMSK_TOP_EXPIRED 0x0040 +#define SUNI1x10GEXP_BITMSK_TOP_PL4_ID_DOOL 0x0010 +#define SUNI1x10GEXP_BITMSK_TOP_PL4_IS_DOOL 0x0008 +#define SUNI1x10GEXP_BITMSK_TOP_PL4_ID_ROOL 0x0004 +#define SUNI1x10GEXP_BITMSK_TOP_PL4_IS_ROOL 0x0002 +#define SUNI1x10GEXP_BITMSK_TOP_PL4_OUT_ROOL 0x0001 + +/*---------------------------------------------------------------------------- + * Register 0x000E:PM3393 Global interrupt enable + * Bit 15 TOP_INTE + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_TOP_INTE 0x8000 + +/*---------------------------------------------------------------------------- + * Register 0x2040: RXXG Configuration 1 + * Bit 15 RXXG_RXEN + * Bit 14 RXXG_ROCF + * Bit 13 RXXG_PAD_STRIP + * Bit 10 RXXG_PUREP + * Bit 9 RXXG_LONGP + * Bit 8 RXXG_PARF + * Bit 7 RXXG_FLCHK + * Bit 5 RXXG_PASS_CTRL + * Bit 3 RXXG_CRC_STRIP + * Bit 2-0 RXXG_MIFG + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_RXXG_RXEN 0x8000 +#define SUNI1x10GEXP_BITMSK_RXXG_PUREP 0x0400 +#define SUNI1x10GEXP_BITMSK_RXXG_FLCHK 0x0080 +#define SUNI1x10GEXP_BITMSK_RXXG_CRC_STRIP 0x0008 + +/*---------------------------------------------------------------------------- + * Register 0x2070: RXXG Address Filter Control 2 + * Bit 1 RXXG_PMODE + * Bit 0 RXXG_MHASH_EN + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_RXXG_PMODE 0x0002 +#define SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN 0x0001 + +/*---------------------------------------------------------------------------- + * Register 0x2100: MSTAT Control + * Bit 2 MSTAT_WRITE + * Bit 1 MSTAT_CLEAR + * Bit 0 MSTAT_SNAP + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_MSTAT_CLEAR 0x0002 +#define SUNI1x10GEXP_BITMSK_MSTAT_SNAP 0x0001 + +/*---------------------------------------------------------------------------- + * Register 0x3040: TXXG Configuration Register 1 + * Bit 15 TXXG_TXEN0 + * Bit 13 TXXG_HOSTPAUSE + * Bit 12-7 TXXG_IPGT + * Bit 5 TXXG_32BIT_ALIGN + * Bit 4 TXXG_CRCEN + * Bit 3 TXXG_FCTX + * Bit 2 TXXG_FCRX + * Bit 1 TXXG_PADEN + * Bit 0 TXXG_SPRE + *----------------------------------------------------------------------------*/ +#define SUNI1x10GEXP_BITMSK_TXXG_TXEN0 0x8000 +#define SUNI1x10GEXP_BITOFF_TXXG_IPGT 7 +#define SUNI1x10GEXP_BITMSK_TXXG_32BIT_ALIGN 0x0020 +#define SUNI1x10GEXP_BITMSK_TXXG_CRCEN 0x0010 +#define SUNI1x10GEXP_BITMSK_TXXG_FCTX 0x0008 +#define SUNI1x10GEXP_BITMSK_TXXG_FCRX 0x0004 +#define SUNI1x10GEXP_BITMSK_TXXG_PADEN 0x0002 + +#endif /* _SUNI1x10GEXP_REGS_H */ + diff --git a/drivers/net/chelsio/tp.c b/drivers/net/chelsio/tp.c new file mode 100644 index 000000000000..9ad5c539fd28 --- /dev/null +++ b/drivers/net/chelsio/tp.c @@ -0,0 +1,188 @@ +/***************************************************************************** + * * + * File: tp.c * + * $Revision: 1.6 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * Core ASIC Management. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" +#include "regs.h" +#include "tp.h" + +struct petp { + adapter_t *adapter; +}; + +/* Pause deadlock avoidance parameters */ +#define DROP_MSEC 16 +#define DROP_PKTS_CNT 1 + + +static void tp_init(adapter_t *ap, const struct tp_params *p, + unsigned int tp_clk) +{ + if (t1_is_asic(ap)) { + u32 val; + + val = F_TP_IN_CSPI_CPL | F_TP_IN_CSPI_CHECK_IP_CSUM | + F_TP_IN_CSPI_CHECK_TCP_CSUM | F_TP_IN_ESPI_ETHERNET; + if (!p->pm_size) + val |= F_OFFLOAD_DISABLE; + else + val |= F_TP_IN_ESPI_CHECK_IP_CSUM | + F_TP_IN_ESPI_CHECK_TCP_CSUM; + t1_write_reg_4(ap, A_TP_IN_CONFIG, val); + t1_write_reg_4(ap, A_TP_OUT_CONFIG, F_TP_OUT_CSPI_CPL | + F_TP_OUT_ESPI_ETHERNET | + F_TP_OUT_ESPI_GENERATE_IP_CSUM | + F_TP_OUT_ESPI_GENERATE_TCP_CSUM); + t1_write_reg_4(ap, A_TP_GLOBAL_CONFIG, V_IP_TTL(64) | + F_PATH_MTU /* IP DF bit */ | + V_5TUPLE_LOOKUP(p->use_5tuple_mode) | + V_SYN_COOKIE_PARAMETER(29)); + + /* + * Enable pause frame deadlock prevention. + */ + if (is_T2(ap)) { + u32 drop_ticks = DROP_MSEC * (tp_clk / 1000); + + t1_write_reg_4(ap, A_TP_TX_DROP_CONFIG, + F_ENABLE_TX_DROP | F_ENABLE_TX_ERROR | + V_DROP_TICKS_CNT(drop_ticks) | + V_NUM_PKTS_DROPPED(DROP_PKTS_CNT)); + } + + } +} + +void t1_tp_destroy(struct petp *tp) +{ + kfree(tp); +} + +struct petp * __devinit t1_tp_create(adapter_t *adapter, struct tp_params *p) +{ + struct petp *tp = kmalloc(sizeof(*tp), GFP_KERNEL); + if (!tp) + return NULL; + memset(tp, 0, sizeof(*tp)); + tp->adapter = adapter; + + return tp; +} + +void t1_tp_intr_enable(struct petp *tp) +{ + u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); + + { + /* We don't use any TP interrupts */ + t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); + t1_write_reg_4(tp->adapter, A_PL_ENABLE, + tp_intr | F_PL_INTR_TP); + } +} + +void t1_tp_intr_disable(struct petp *tp) +{ + u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); + + { + t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); + t1_write_reg_4(tp->adapter, A_PL_ENABLE, + tp_intr & ~F_PL_INTR_TP); + } +} + +void t1_tp_intr_clear(struct petp *tp) +{ + t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, 0xffffffff); + t1_write_reg_4(tp->adapter, A_PL_CAUSE, F_PL_INTR_TP); +} + +int t1_tp_intr_handler(struct petp *tp) +{ + u32 cause; + + + cause = t1_read_reg_4(tp->adapter, A_TP_INT_CAUSE); + t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, cause); + return 0; +} + +static void set_csum_offload(struct petp *tp, u32 csum_bit, int enable) +{ + u32 val = t1_read_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG); + + if (enable) + val |= csum_bit; + else + val &= ~csum_bit; + t1_write_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG, val); +} + +void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable) +{ + set_csum_offload(tp, F_IP_CSUM, enable); +} + +void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable) +{ + set_csum_offload(tp, F_UDP_CSUM, enable); +} + +void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable) +{ + set_csum_offload(tp, F_TCP_CSUM, enable); +} + +/* + * Initialize TP state. tp_params contains initial settings for some TP + * parameters, particularly the one-time PM and CM settings. + */ +int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk) +{ + int busy = 0; + adapter_t *adapter = tp->adapter; + + tp_init(adapter, p, tp_clk); + if (!busy) + t1_write_reg_4(adapter, A_TP_RESET, F_TP_RESET); + else + CH_ERR("%s: TP initialization timed out\n", + adapter->name); + return busy; +} diff --git a/drivers/net/chelsio/tp.h b/drivers/net/chelsio/tp.h new file mode 100644 index 000000000000..2ebc5c0d62e7 --- /dev/null +++ b/drivers/net/chelsio/tp.h @@ -0,0 +1,110 @@ +/***************************************************************************** + * * + * File: tp.h * + * $Revision: 1.3 $ * + * $Date: 2005/03/23 07:15:59 $ * + * Description: * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#ifndef CHELSIO_TP_H +#define CHELSIO_TP_H + +#include "common.h" + +#define TP_MAX_RX_COALESCING_SIZE 16224U + +struct tp_mib_statistics { + + /* IP */ + u32 ipInReceive_hi; + u32 ipInReceive_lo; + u32 ipInHdrErrors_hi; + u32 ipInHdrErrors_lo; + u32 ipInAddrErrors_hi; + u32 ipInAddrErrors_lo; + u32 ipInUnknownProtos_hi; + u32 ipInUnknownProtos_lo; + u32 ipInDiscards_hi; + u32 ipInDiscards_lo; + u32 ipInDelivers_hi; + u32 ipInDelivers_lo; + u32 ipOutRequests_hi; + u32 ipOutRequests_lo; + u32 ipOutDiscards_hi; + u32 ipOutDiscards_lo; + u32 ipOutNoRoutes_hi; + u32 ipOutNoRoutes_lo; + u32 ipReasmTimeout; + u32 ipReasmReqds; + u32 ipReasmOKs; + u32 ipReasmFails; + + u32 reserved[8]; + + /* TCP */ + u32 tcpActiveOpens; + u32 tcpPassiveOpens; + u32 tcpAttemptFails; + u32 tcpEstabResets; + u32 tcpOutRsts; + u32 tcpCurrEstab; + u32 tcpInSegs_hi; + u32 tcpInSegs_lo; + u32 tcpOutSegs_hi; + u32 tcpOutSegs_lo; + u32 tcpRetransSeg_hi; + u32 tcpRetransSeg_lo; + u32 tcpInErrs_hi; + u32 tcpInErrs_lo; + u32 tcpRtoMin; + u32 tcpRtoMax; +}; + +struct petp; +struct tp_params; + +struct petp *t1_tp_create(adapter_t *adapter, struct tp_params *p); +void t1_tp_destroy(struct petp *tp); + +void t1_tp_intr_disable(struct petp *tp); +void t1_tp_intr_enable(struct petp *tp); +void t1_tp_intr_clear(struct petp *tp); +int t1_tp_intr_handler(struct petp *tp); + +void t1_tp_get_mib_statistics(adapter_t *adap, struct tp_mib_statistics *tps); +void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable); +void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable); +void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable); +int t1_tp_set_coalescing_size(struct petp *tp, unsigned int size); +int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk); +#endif From 559fb51ba7e66fe298b8355fabde1275b7def35f Mon Sep 17 00:00:00 2001 From: Scott Bardone Date: Thu, 23 Jun 2005 01:40:19 -0400 Subject: [PATCH 002/584] Update Chelsio gige net driver. - Use extern prefix for functions required. - Removed a lot of wrappers, including t1_read/write_reg_4. - Removed various macros, using native kernel calls now. - Enumerated various #defines. - Removed a lot of shared code which is not currently used in "NIC only" mode. - Removed dead code. Documentation/networking/cxgb.txt: - Updated release notes for version 2.1.1 drivers/net/chelsio/ch_ethtool.h - removed file, no longer using ETHTOOL namespace. drivers/net/chelsio/common.h - moved code from osdep.h to common.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/cphy.h - removed dead code. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/cxgb2.c - use DMA_{32,64}BIT_MASK in include/linux/dma-mapping.h. - removed unused code. - use printk message for link info resembling drivers/net/mii.c. - no longer using the MODULE_xxx namespace. - no longer using "pci_" namespace. - no longer using ETHTOOL namespace. drivers/net/chelsio/cxgb2.h - removed file, merged into common.h drivers/net/chelsio/elmer0.h - removed dead code. - added various enums. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/espi.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. drivers/net/chelsio/espi.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/gmac.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/mv88x201x.c - changes to sync with Chelsio TOT. drivers/net/chelsio/osdep.h - removed file, consolidation. osdep was used to translate wrapper functions since our code supports multiple OSs. removed wrappers. drivers/net/chelsio/pm3393.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. - removed unused code. drivers/net/chelsio/regs.h - added a few register entries for future and current feature support. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/sge.c - rewrote large portion of scatter-gather engine to stabilize performance. - using u8/u16/u32 kernel types instead of __u8/__u16/__u32 compiler types. drivers/net/chelsio/sge.h - rewrote large portion of scatter-gather engine to stabilize performance. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/subr.c - merged tp.c into subr.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. - removed unused code. drivers/net/chelsio/suni1x10gexp_regs.h - modified copyright and authorship of file. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/tp.c - removed file, merged into subr.c. drivers/net/chelsio/tp.h - removed file. include/linux/pci_ids.h - patched to include PCI_VENDOR_ID_CHELSIO 0x1425, removed define from our code. --- Documentation/networking/cxgb.txt | 72 +- drivers/net/chelsio/Makefile | 3 +- drivers/net/chelsio/ch_ethtool.h | 102 - drivers/net/chelsio/common.h | 259 +-- drivers/net/chelsio/cphy.h | 14 +- drivers/net/chelsio/cpl5_cmd.h | 118 +- drivers/net/chelsio/cxgb2.c | 535 +++--- drivers/net/chelsio/cxgb2.h | 122 -- drivers/net/chelsio/elmer0.h | 16 +- drivers/net/chelsio/espi.c | 172 +- drivers/net/chelsio/espi.h | 11 +- drivers/net/chelsio/gmac.h | 11 +- drivers/net/chelsio/mv88x201x.c | 36 +- drivers/net/chelsio/osdep.h | 169 -- drivers/net/chelsio/pm3393.c | 45 +- drivers/net/chelsio/regs.h | 21 +- drivers/net/chelsio/sge.c | 2301 +++++++++++++---------- drivers/net/chelsio/sge.h | 48 +- drivers/net/chelsio/subr.c | 235 ++- drivers/net/chelsio/suni1x10gexp_regs.h | 20 +- drivers/net/chelsio/tp.c | 188 -- drivers/net/chelsio/tp.h | 110 -- include/linux/pci_ids.h | 1 + 23 files changed, 2104 insertions(+), 2505 deletions(-) delete mode 100644 drivers/net/chelsio/ch_ethtool.h delete mode 100644 drivers/net/chelsio/cxgb2.h delete mode 100644 drivers/net/chelsio/osdep.h delete mode 100644 drivers/net/chelsio/tp.c delete mode 100644 drivers/net/chelsio/tp.h diff --git a/Documentation/networking/cxgb.txt b/Documentation/networking/cxgb.txt index 9f2eb646c6f5..76324638626b 100644 --- a/Documentation/networking/cxgb.txt +++ b/Documentation/networking/cxgb.txt @@ -2,9 +2,9 @@ Driver Release Notes for Linux - Version 2.1.0 + Version 2.1.1 - March 8, 2005 + June 20, 2005 CONTENTS ======== @@ -21,8 +21,7 @@ INTRODUCTION This document describes the Linux driver for Chelsio 10Gb Ethernet Network Controller. This driver supports the Chelsio N210 NIC and is backward - compatible with the Chelsio N110 model 10Gb NICs. This driver supports AMD64 - and EM64T, and x86 systems. + compatible with the Chelsio N110 model 10Gb NICs. FEATURES @@ -121,23 +120,17 @@ PERFORMANCE Disabling SACK: sysctl -w net.ipv4.tcp_sack=0 - Setting TCP read buffers (min/default/max): - sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" - - Setting TCP write buffers (min/pressure/max): - sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" - - Setting TCP buffer space (min/pressure/max): - sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" - - Setting large number of incoming connection requests (2.6.x only): + Setting large number of incoming connection requests: sysctl -w net.ipv4.tcp_max_syn_backlog=3000 Setting maximum receive socket buffer size: - sysctl -w net.core.rmem_max=524287 + sysctl -w net.core.rmem_max=1024000 Setting maximum send socket buffer size: - sysctl -w net.core.wmem_max=524287 + sysctl -w net.core.wmem_max=1024000 + + Set smp_affinity (on a multiprocessor system) to a single CPU: + echo 1 > /proc/irq//smp_affinity Setting default receive socket buffer size: sysctl -w net.core.rmem_default=524287 @@ -151,8 +144,14 @@ PERFORMANCE Setting maximum backlog (# of unprocessed packets before kernel drops): sysctl -w net.core.netdev_max_backlog=300000 - Set smp_affinity (on a multiprocessor system) to a single CPU: - echo 00000001 > /proc/irq//smp_affinity + Setting TCP read buffers (min/default/max): + sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" + + Setting TCP write buffers (min/pressure/max): + sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" + + Setting TCP buffer space (min/pressure/max): + sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" TCP window size for single connections: The receive buffer (RX_WINDOW) size must be at least as large as the @@ -186,7 +185,7 @@ DRIVER MESSAGES may be found in /var/log/messages. Driver up: - Chelsio Network Driver - version 2.1.0 + Chelsio Network Driver - version 2.1.1 NIC detected: eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit @@ -282,13 +281,44 @@ KNOWN ISSUES the number of outstanding transactions, via BIOS configuration programming of the PCI-X card, to the following: - Data Length (bytes): 2k - Total allowed outstanding transactions: 1 + Data Length (bytes): 1k + Total allowed outstanding transactions: 2 Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004, section 56, "133-MHz Mode Split Completion Data Corruption" for more details with this bug and workarounds suggested by AMD. + It may be possible to work outside AMD's recommended PCI-X settings, try + increasing the Data Length to 2k bytes for increased performance. If you + have issues with these settings, please revert to the "safe" settings + and duplicate the problem before submitting a bug or asking for support. + + NOTE: The default setting on most systems is 8 outstanding transactions + and 2k bytes data length. + + 4. On multiprocessor systems, it has been noted that an application which + is handling 10Gb networking can switch between CPUs causing degraded + and/or unstable performance. + + If running on an SMP system and taking performance measurements, it + is suggested you either run the latest netperf-2.4.0+ or use a binding + tool such as Tim Hockin's procstate utilities (runon) + . + + Binding netserver and netperf (or other applications) to particular + CPUs will have a significant difference in performance measurements. + You may need to experiment which CPU to bind the application to in + order to achieve the best performance for your system. + + If you are developing an application designed for 10Gb networking, + please keep in mind you may want to look at kernel functions + sched_setaffinity & sched_getaffinity to bind your application. + + If you are just running user-space applications such as ftp, telnet, + etc., you may want to try the runon tool provided by Tim Hockin's + procstate utility. You could also try binding the interface to a + particular CPU: runon 0 ifup eth0 + SUPPORT ======= diff --git a/drivers/net/chelsio/Makefile b/drivers/net/chelsio/Makefile index ff8c11b3a4e1..91e927827c43 100644 --- a/drivers/net/chelsio/Makefile +++ b/drivers/net/chelsio/Makefile @@ -7,6 +7,5 @@ obj-$(CONFIG_CHELSIO_T1) += cxgb.o EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/chelsio $(DEBUG_FLAGS) -cxgb-objs := cxgb2.o espi.o tp.o pm3393.o sge.o subr.o mv88x201x.o - +cxgb-objs := cxgb2.o espi.o pm3393.o sge.o subr.o mv88x201x.o diff --git a/drivers/net/chelsio/ch_ethtool.h b/drivers/net/chelsio/ch_ethtool.h deleted file mode 100644 index c523d24836b5..000000000000 --- a/drivers/net/chelsio/ch_ethtool.h +++ /dev/null @@ -1,102 +0,0 @@ -/***************************************************************************** - * * - * File: ch_ethtool.h * - * $Revision: 1.5 $ * - * $Date: 2005/03/23 07:15:58 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CHETHTOOL_LINUX_H__ -#define __CHETHTOOL_LINUX_H__ - -/* TCB size in 32-bit words */ -#define TCB_WORDS (TCB_SIZE / 4) - -enum { - ETHTOOL_SETREG, - ETHTOOL_GETREG, - ETHTOOL_SETTPI, - ETHTOOL_GETTPI, - ETHTOOL_DEVUP, - ETHTOOL_GETMTUTAB, - ETHTOOL_SETMTUTAB, - ETHTOOL_GETMTU, - ETHTOOL_SET_PM, - ETHTOOL_GET_PM, - ETHTOOL_GET_TCAM, - ETHTOOL_SET_TCAM, - ETHTOOL_GET_TCB, - ETHTOOL_READ_TCAM_WORD, -}; - -struct ethtool_reg { - uint32_t cmd; - uint32_t addr; - uint32_t val; -}; - -struct ethtool_mtus { - uint32_t cmd; - uint16_t mtus[NMTUS]; -}; - -struct ethtool_pm { - uint32_t cmd; - uint32_t tx_pg_sz; - uint32_t tx_num_pg; - uint32_t rx_pg_sz; - uint32_t rx_num_pg; - uint32_t pm_total; -}; - -struct ethtool_tcam { - uint32_t cmd; - uint32_t tcam_size; - uint32_t nservers; - uint32_t nroutes; -}; - -struct ethtool_tcb { - uint32_t cmd; - uint32_t tcb_index; - uint32_t tcb_data[TCB_WORDS]; -}; - -struct ethtool_tcam_word { - uint32_t cmd; - uint32_t addr; - uint32_t buf[3]; -}; - -#define SIOCCHETHTOOL SIOCDEVPRIVATE -#endif diff --git a/drivers/net/chelsio/common.h b/drivers/net/chelsio/common.h index 017684ff48dc..f09348802b46 100644 --- a/drivers/net/chelsio/common.h +++ b/drivers/net/chelsio/common.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: common.h * - * $Revision: 1.5 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.21 $ * + * $Date: 2005/06/22 00:43:25 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,74 +36,101 @@ * * ****************************************************************************/ -#ifndef CHELSIO_COMMON_H -#define CHELSIO_COMMON_H +#ifndef _CXGB_COMMON_H_ +#define _CXGB_COMMON_H_ -#define DIMOF(x) (sizeof(x)/sizeof(x[0])) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#define NMTUS 8 -#define MAX_NPORTS 4 -#define TCB_SIZE 128 +#define DRV_DESCRIPTION "Chelsio 10Gb Ethernet Driver" +#define DRV_NAME "cxgb" +#define DRV_VERSION "2.1.1" +#define PFX DRV_NAME ": " + +#define CH_ERR(fmt, ...) printk(KERN_ERR PFX fmt, ## __VA_ARGS__) +#define CH_WARN(fmt, ...) printk(KERN_WARNING PFX fmt, ## __VA_ARGS__) +#define CH_ALERT(fmt, ...) printk(KERN_ALERT PFX fmt, ## __VA_ARGS__) + +#define CH_DEVICE(devid, ssid, idx) \ + { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx } + +#define SUPPORTED_PAUSE (1 << 13) +#define SUPPORTED_LOOPBACK (1 << 15) + +#define ADVERTISED_PAUSE (1 << 13) +#define ADVERTISED_ASYM_PAUSE (1 << 14) + +typedef struct adapter adapter_t; + +void t1_elmer0_ext_intr(adapter_t *adapter); +void t1_link_changed(adapter_t *adapter, int port_id, int link_status, + int speed, int duplex, int fc); + +struct t1_rx_mode { + struct net_device *dev; + u32 idx; + struct dev_mc_list *list; +}; + +#define t1_rx_mode_promisc(rm) (rm->dev->flags & IFF_PROMISC) +#define t1_rx_mode_allmulti(rm) (rm->dev->flags & IFF_ALLMULTI) +#define t1_rx_mode_mc_cnt(rm) (rm->dev->mc_count) + +static inline u8 *t1_get_next_mcaddr(struct t1_rx_mode *rm) +{ + u8 *addr = 0; + + if (rm->idx++ < rm->dev->mc_count) { + addr = rm->list->dmi_addr; + rm->list = rm->list->next; + } + return addr; +} + +#define MAX_NPORTS 4 + +#define SPEED_INVALID 0xffff +#define DUPLEX_INVALID 0xff enum { - CHBT_BOARD_7500, - CHBT_BOARD_8000, - CHBT_BOARD_CHT101, - CHBT_BOARD_CHT110, - CHBT_BOARD_CHT210, - CHBT_BOARD_CHT204, CHBT_BOARD_N110, - CHBT_BOARD_N210, - CHBT_BOARD_COUGAR, - CHBT_BOARD_6800, - CHBT_BOARD_SIMUL + CHBT_BOARD_N210 }; enum { - CHBT_TERM_FPGA, CHBT_TERM_T1, - CHBT_TERM_T2, - CHBT_TERM_T3 + CHBT_TERM_T2 }; enum { - CHBT_MAC_CHELSIO_A, - CHBT_MAC_IXF1010, CHBT_MAC_PM3393, - CHBT_MAC_VSC7321, - CHBT_MAC_DUMMY }; enum { - CHBT_PHY_88E1041, - CHBT_PHY_88E1111, CHBT_PHY_88X2010, - CHBT_PHY_XPAK, - CHBT_PHY_MY3126, - CHBT_PHY_DUMMY }; enum { - PAUSE_RX = 1, - PAUSE_TX = 2, - PAUSE_AUTONEG = 4 + PAUSE_RX = 1 << 0, + PAUSE_TX = 1 << 1, + PAUSE_AUTONEG = 1 << 2 }; /* Revisions of T1 chip */ -#define TERM_T1A 0 -#define TERM_T1B 1 -#define TERM_T2 3 - -struct tp_params { - unsigned int pm_size; - unsigned int cm_size; - unsigned int pm_rx_base; - unsigned int pm_tx_base; - unsigned int pm_rx_pg_size; - unsigned int pm_tx_pg_size; - unsigned int pm_rx_num_pgs; - unsigned int pm_tx_num_pgs; - unsigned int use_5tuple_mode; +enum { + TERM_T1A = 0, + TERM_T1B = 1, + TERM_T2 = 3 }; struct sge_params { @@ -118,17 +145,7 @@ struct sge_params { unsigned int polling; }; -struct mc5_params { - unsigned int mode; /* selects MC5 width */ - unsigned int nservers; /* size of server region */ - unsigned int nroutes; /* size of routing region */ -}; - -/* Default MC5 region sizes */ -#define DEFAULT_SERVER_REGION_LEN 256 -#define DEFAULT_RT_REGION_LEN 1024 - -struct pci_params { +struct chelsio_pci_params { unsigned short speed; unsigned char width; unsigned char is_pcix; @@ -136,31 +153,14 @@ struct pci_params { struct adapter_params { struct sge_params sge; - struct mc5_params mc5; - struct tp_params tp; - struct pci_params pci; + struct chelsio_pci_params pci; const struct board_info *brd_info; - unsigned short mtus[NMTUS]; - unsigned int nports; /* # of ethernet ports */ + unsigned int nports; /* # of ethernet ports */ unsigned int stats_update_period; unsigned short chip_revision; unsigned char chip_version; - unsigned char is_asic; -}; - -struct pci_err_cnt { - unsigned int master_parity_err; - unsigned int sig_target_abort; - unsigned int rcv_target_abort; - unsigned int rcv_master_abort; - unsigned int sig_sys_err; - unsigned int det_parity_err; - unsigned int pio_parity_err; - unsigned int wf_parity_err; - unsigned int rf_parity_err; - unsigned int cf_parity_err; }; struct link_config { @@ -175,8 +175,60 @@ struct link_config { unsigned char autoneg; /* autonegotiating? */ }; -#define SPEED_INVALID 0xffff -#define DUPLEX_INVALID 0xff +struct cmac; +struct cphy; + +struct port_info { + struct net_device *dev; + struct cmac *mac; + struct cphy *phy; + struct link_config link_config; + struct net_device_stats netstats; +}; + +struct sge; +struct peespi; + +struct adapter { + u8 *regs; + struct pci_dev *pdev; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned long flags; + + const char *name; + int msg_enable; + u32 mmio_len; + + struct work_struct ext_intr_handler_task; + struct adapter_params params; + + struct vlan_group *vlan_grp; + + /* Terminator modules. */ + struct sge *sge; + struct peespi *espi; + + struct port_info port[MAX_NPORTS]; + struct work_struct stats_update_task; + struct timer_list stats_update_timer; + + struct semaphore mib_mutex; + spinlock_t tpi_lock; + spinlock_t work_lock; + /* guards async operations */ + spinlock_t async_lock ____cacheline_aligned; + u32 slow_intr_mask; +}; + +enum { /* adapter flags */ + FULL_INIT_DONE = 1 << 0, + TSO_CAPABLE = 1 << 2, + TCP_CSUM_CAPABLE = 1 << 3, + UDP_CSUM_CAPABLE = 1 << 4, + VLAN_ACCEL_CAPABLE = 1 << 5, + RX_CSUM_ENABLED = 1 << 6, +}; struct mdio_ops; struct gmac; @@ -205,19 +257,8 @@ struct board_info { const char *desc; }; -#include "osdep.h" - -#ifndef PCI_VENDOR_ID_CHELSIO -#define PCI_VENDOR_ID_CHELSIO 0x1425 -#endif - extern struct pci_device_id t1_pci_tbl[]; -static inline int t1_is_asic(const adapter_t *adapter) -{ - return adapter->params.is_asic; -} - static inline int adapter_matches_type(const adapter_t *adapter, int version, int revision) { @@ -245,25 +286,29 @@ static inline unsigned int core_ticks_per_usec(const adapter_t *adap) return board_info(adap)->clock_core / 1000000; } -int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value); -int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *value); +extern int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value); +extern int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *value); -void t1_interrupts_enable(adapter_t *adapter); -void t1_interrupts_disable(adapter_t *adapter); -void t1_interrupts_clear(adapter_t *adapter); -int elmer0_ext_intr_handler(adapter_t *adapter); -int t1_slow_intr_handler(adapter_t *adapter); +extern void t1_interrupts_enable(adapter_t *adapter); +extern void t1_interrupts_disable(adapter_t *adapter); +extern void t1_interrupts_clear(adapter_t *adapter); +extern int elmer0_ext_intr_handler(adapter_t *adapter); +extern int t1_slow_intr_handler(adapter_t *adapter); -int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); -const struct board_info *t1_get_board_info(unsigned int board_id); -const struct board_info *t1_get_board_info_from_ids(unsigned int devid, +extern int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); +extern const struct board_info *t1_get_board_info(unsigned int board_id); +extern const struct board_info *t1_get_board_info_from_ids(unsigned int devid, unsigned short ssid); -int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data); -int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, +extern int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data); +extern int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, struct adapter_params *p); -int t1_init_hw_modules(adapter_t *adapter); -int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); -void t1_free_sw_modules(adapter_t *adapter); -void t1_fatal_err(adapter_t *adapter); -#endif +extern int t1_init_hw_modules(adapter_t *adapter); +extern int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); +extern void t1_free_sw_modules(adapter_t *adapter); +extern void t1_fatal_err(adapter_t *adapter); +extern void t1_tp_set_udp_checksum_offload(adapter_t *adapter, int enable); +extern void t1_tp_set_tcp_checksum_offload(adapter_t *adapter, int enable); +extern void t1_tp_set_ip_checksum_offload(adapter_t *adapter, int enable); + +#endif /* _CXGB_COMMON_H_ */ diff --git a/drivers/net/chelsio/cphy.h b/drivers/net/chelsio/cphy.h index 1bc2248264c0..3412342f7345 100644 --- a/drivers/net/chelsio/cphy.h +++ b/drivers/net/chelsio/cphy.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cphy.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.7 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_CPHY_H -#define CHELSIO_CPHY_H +#ifndef _CXGB_CPHY_H_ +#define _CXGB_CPHY_H_ #include "common.h" @@ -142,9 +142,7 @@ struct gphy { int (*reset)(adapter_t *adapter); }; -extern struct gphy t1_my3126_ops; -extern struct gphy t1_mv88e1xxx_ops; -extern struct gphy t1_xpak_ops; extern struct gphy t1_mv88x201x_ops; extern struct gphy t1_dummy_phy_ops; -#endif + +#endif /* _CXGB_CPHY_H_ */ diff --git a/drivers/net/chelsio/cpl5_cmd.h b/drivers/net/chelsio/cpl5_cmd.h index 45e9248979f1..27925e487bcf 100644 --- a/drivers/net/chelsio/cpl5_cmd.h +++ b/drivers/net/chelsio/cpl5_cmd.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cpl5_cmd.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef _CPL5_CMD_H -#define _CPL5_CMD_H +#ifndef _CXGB_CPL5_CMD_H_ +#define _CXGB_CPL5_CMD_H_ #include @@ -59,12 +59,12 @@ enum { /* TX_PKT_LSO ethernet types */ }; struct cpl_rx_data { - __u32 rsvd0; - __u32 len; - __u32 seq; - __u16 urg; - __u8 rsvd1; - __u8 status; + u32 rsvd0; + u32 len; + u32 seq; + u16 urg; + u8 rsvd1; + u8 status; }; /* @@ -73,73 +73,73 @@ struct cpl_rx_data { * used so we break it into 2 16-bit parts to easily meet our alignment needs. */ struct cpl_tx_pkt { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 ip_csum_dis:1; - __u8 l4_csum_dis:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 ip_csum_dis:1; + u8 l4_csum_dis:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 l4_csum_dis:1; - __u8 ip_csum_dis:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 l4_csum_dis:1; + u8 ip_csum_dis:1; + u8 iff:4; #endif - __u16 vlan; - __u16 len_hi; - __u16 len_lo; + u16 vlan; + u16 len_hi; + u16 len_lo; }; struct cpl_tx_pkt_lso { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 ip_csum_dis:1; - __u8 l4_csum_dis:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 ip_csum_dis:1; + u8 l4_csum_dis:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 l4_csum_dis:1; - __u8 ip_csum_dis:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 l4_csum_dis:1; + u8 ip_csum_dis:1; + u8 iff:4; #endif - __u16 vlan; - __u32 len; + u16 vlan; + u32 len; - __u32 rsvd2; - __u8 rsvd3; + u32 rsvd2; + u8 rsvd3; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 tcp_hdr_words:4; - __u8 ip_hdr_words:4; + u8 tcp_hdr_words:4; + u8 ip_hdr_words:4; #else - __u8 ip_hdr_words:4; - __u8 tcp_hdr_words:4; + u8 ip_hdr_words:4; + u8 tcp_hdr_words:4; #endif - __u16 eth_type_mss; + u16 eth_type_mss; }; struct cpl_rx_pkt { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 csum_valid:1; - __u8 bad_pkt:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 csum_valid:1; + u8 bad_pkt:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 bad_pkt:1; - __u8 csum_valid:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 bad_pkt:1; + u8 csum_valid:1; + u8 iff:4; #endif - __u16 csum; - __u16 vlan; - __u16 len; + u16 csum; + u16 vlan; + u16 len; }; -#endif +#endif /* _CXGB_CPL5_CMD_H_ */ diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c index 48c4d5acfcd1..28ae478b386d 100644 --- a/drivers/net/chelsio/cxgb2.c +++ b/drivers/net/chelsio/cxgb2.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cxgb2.c * - * $Revision: 1.11 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.25 $ * + * $Date: 2005/06/22 00:43:25 $ * * Description: * * Chelsio 10Gb Ethernet Driver. * * * @@ -37,7 +37,6 @@ ****************************************************************************/ #include "common.h" - #include #include #include @@ -48,19 +47,46 @@ #include #include #include -#include -#include +#include #include -#include "ch_ethtool.h" #include "cpl5_cmd.h" #include "regs.h" #include "gmac.h" #include "cphy.h" #include "sge.h" -#include "tp.h" #include "espi.h" +#ifdef work_struct +#include +#define INIT_WORK INIT_TQUEUE +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks + +static inline void schedule_mac_stats_update(struct adapter *ap, int secs) +{ + mod_timer(&ap->stats_update_timer, jiffies + secs * HZ); +} + +static inline void cancel_mac_stats_update(struct adapter *ap) +{ + del_timer_sync(&ap->stats_update_timer); + flush_scheduled_tasks(); +} + +/* + * Stats update timer for 2.4. It schedules a task to do the actual update as + * we need to access MAC statistics in process context. + */ +static void mac_stats_timer(unsigned long data) +{ + struct adapter *ap = (struct adapter *)data; + + schedule_task(&ap->stats_update_task); +} +#else +#include + static inline void schedule_mac_stats_update(struct adapter *ap, int secs) { schedule_delayed_work(&ap->stats_update_task, secs * HZ); @@ -70,23 +96,8 @@ static inline void cancel_mac_stats_update(struct adapter *ap) { cancel_delayed_work(&ap->stats_update_task); } - -#if BITS_PER_LONG == 64 && !defined(CONFIG_X86_64) -# define FMT64 "l" -#else -# define FMT64 "ll" #endif -# define DRV_TYPE "" -# define MODULE_DESC "Chelsio Network Driver" - -static char driver_name[] = DRV_NAME; -static char driver_string[] = "Chelsio " DRV_TYPE "Network Driver"; -static char driver_version[] = "2.1.0"; - -#define PCI_DMA_64BIT ~0ULL -#define PCI_DMA_32BIT 0xffffffffULL - #define MAX_CMDQ_ENTRIES 16384 #define MAX_CMDQ1_ENTRIES 1024 #define MAX_RX_BUFFERS 16384 @@ -107,10 +118,9 @@ static char driver_version[] = "2.1.0"; */ #define EEPROM_SIZE 32 -MODULE_DESCRIPTION(MODULE_DESC); +MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Chelsio Communications"); MODULE_LICENSE("GPL"); -MODULE_DEVICE_TABLE(pci, t1_pci_tbl); static int dflt_msg_enable = DFLT_MSG_ENABLE; @@ -140,17 +150,17 @@ static void t1_set_rxmode(struct net_device *dev) static void link_report(struct port_info *p) { if (!netif_carrier_ok(p->dev)) - printk(KERN_INFO "%s: link is down\n", p->dev->name); + printk(KERN_INFO "%s: link down\n", p->dev->name); else { - const char *s = "10 Mbps"; + const char *s = "10Mbps"; switch (p->link_config.speed) { - case SPEED_10000: s = "10 Gbps"; break; - case SPEED_1000: s = "1000 Mbps"; break; - case SPEED_100: s = "100 Mbps"; break; + case SPEED_10000: s = "10Gbps"; break; + case SPEED_1000: s = "1000Mbps"; break; + case SPEED_100: s = "100Mbps"; break; } - printk(KERN_INFO "%s: link is up at %s, %s duplex\n", + printk(KERN_INFO "%s: link up, %s, %s-duplex\n", p->dev->name, s, p->link_config.duplex == DUPLEX_FULL ? "full" : "half"); } @@ -186,10 +196,8 @@ static void link_start(struct port_info *p) static void enable_hw_csum(struct adapter *adapter) { if (adapter->flags & TSO_CAPABLE) - t1_tp_set_ip_checksum_offload(adapter->tp, 1); /* for TSO only */ - if (adapter->flags & UDP_CSUM_CAPABLE) - t1_tp_set_udp_checksum_offload(adapter->tp, 1); - t1_tp_set_tcp_checksum_offload(adapter->tp, 1); + t1_tp_set_ip_checksum_offload(adapter, 1); /* for TSO only */ + t1_tp_set_tcp_checksum_offload(adapter, 1); } /* @@ -210,15 +218,13 @@ static int cxgb_up(struct adapter *adapter) } t1_interrupts_clear(adapter); - - if ((err = request_irq(adapter->pdev->irq, &t1_interrupt, SA_SHIRQ, - adapter->name, adapter))) + if ((err = request_irq(adapter->pdev->irq, + t1_select_intr_handler(adapter), SA_SHIRQ, + adapter->name, adapter))) { goto out_err; - + } t1_sge_start(adapter->sge); t1_interrupts_enable(adapter); - - err = 0; out_err: return err; } @@ -339,47 +345,80 @@ static void set_msglevel(struct net_device *dev, u32 val) } static char stats_strings[][ETH_GSTRING_LEN] = { - "TxOctetsOK", - "TxOctetsBad", - "TxUnicastFramesOK", - "TxMulticastFramesOK", - "TxBroadcastFramesOK", - "TxPauseFrames", - "TxFramesWithDeferredXmissions", - "TxLateCollisions", - "TxTotalCollisions", - "TxFramesAbortedDueToXSCollisions", - "TxUnderrun", - "TxLengthErrors", - "TxInternalMACXmitError", - "TxFramesWithExcessiveDeferral", - "TxFCSErrors", + "TxOctetsOK", + "TxOctetsBad", + "TxUnicastFramesOK", + "TxMulticastFramesOK", + "TxBroadcastFramesOK", + "TxPauseFrames", + "TxFramesWithDeferredXmissions", + "TxLateCollisions", + "TxTotalCollisions", + "TxFramesAbortedDueToXSCollisions", + "TxUnderrun", + "TxLengthErrors", + "TxInternalMACXmitError", + "TxFramesWithExcessiveDeferral", + "TxFCSErrors", - "RxOctetsOK", - "RxOctetsBad", - "RxUnicastFramesOK", - "RxMulticastFramesOK", - "RxBroadcastFramesOK", - "RxPauseFrames", - "RxFCSErrors", - "RxAlignErrors", - "RxSymbolErrors", - "RxDataErrors", - "RxSequenceErrors", - "RxRuntErrors", - "RxJabberErrors", - "RxInternalMACRcvError", - "RxInRangeLengthErrors", - "RxOutOfRangeLengthField", - "RxFrameTooLongErrors" + "RxOctetsOK", + "RxOctetsBad", + "RxUnicastFramesOK", + "RxMulticastFramesOK", + "RxBroadcastFramesOK", + "RxPauseFrames", + "RxFCSErrors", + "RxAlignErrors", + "RxSymbolErrors", + "RxDataErrors", + "RxSequenceErrors", + "RxRuntErrors", + "RxJabberErrors", + "RxInternalMACRcvError", + "RxInRangeLengthErrors", + "RxOutOfRangeLengthField", + "RxFrameTooLongErrors", + + "TSO", + "VLANextractions", + "VLANinsertions", + "RxCsumGood", + "TxCsumOffload", + "RxDrops" + + "respQ_empty", + "respQ_overflow", + "freelistQ_empty", + "pkt_too_big", + "pkt_mismatch", + "cmdQ_full0", + "cmdQ_full1", + "tx_ipfrags", + "tx_reg_pkts", + "tx_lso_pkts", + "tx_do_cksum", + + "espi_DIP2ParityErr", + "espi_DIP4Err", + "espi_RxDrops", + "espi_TxDrops", + "espi_RxOvfl", + "espi_ParityErr" }; + +#define T2_REGMAP_SIZE (3 * 1024) + +static int get_regs_len(struct net_device *dev) +{ + return T2_REGMAP_SIZE; +} static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct adapter *adapter = dev->priv; - strcpy(info->driver, driver_name); - strcpy(info->version, driver_version); + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); strcpy(info->fw_version, "N/A"); strcpy(info->bus_info, pci_name(adapter->pdev)); } @@ -401,42 +440,88 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, struct adapter *adapter = dev->priv; struct cmac *mac = adapter->port[dev->if_port].mac; const struct cmac_statistics *s; + const struct sge_port_stats *ss; + const struct sge_intr_counts *t; s = mac->ops->statistics_update(mac, MAC_STATS_UPDATE_FULL); + ss = t1_sge_get_port_stats(adapter->sge, dev->if_port); + t = t1_sge_get_intr_counts(adapter->sge); - *data++ = s->TxOctetsOK; - *data++ = s->TxOctetsBad; - *data++ = s->TxUnicastFramesOK; - *data++ = s->TxMulticastFramesOK; - *data++ = s->TxBroadcastFramesOK; - *data++ = s->TxPauseFrames; - *data++ = s->TxFramesWithDeferredXmissions; - *data++ = s->TxLateCollisions; - *data++ = s->TxTotalCollisions; - *data++ = s->TxFramesAbortedDueToXSCollisions; - *data++ = s->TxUnderrun; - *data++ = s->TxLengthErrors; - *data++ = s->TxInternalMACXmitError; - *data++ = s->TxFramesWithExcessiveDeferral; - *data++ = s->TxFCSErrors; + *data++ = s->TxOctetsOK; + *data++ = s->TxOctetsBad; + *data++ = s->TxUnicastFramesOK; + *data++ = s->TxMulticastFramesOK; + *data++ = s->TxBroadcastFramesOK; + *data++ = s->TxPauseFrames; + *data++ = s->TxFramesWithDeferredXmissions; + *data++ = s->TxLateCollisions; + *data++ = s->TxTotalCollisions; + *data++ = s->TxFramesAbortedDueToXSCollisions; + *data++ = s->TxUnderrun; + *data++ = s->TxLengthErrors; + *data++ = s->TxInternalMACXmitError; + *data++ = s->TxFramesWithExcessiveDeferral; + *data++ = s->TxFCSErrors; - *data++ = s->RxOctetsOK; - *data++ = s->RxOctetsBad; - *data++ = s->RxUnicastFramesOK; - *data++ = s->RxMulticastFramesOK; - *data++ = s->RxBroadcastFramesOK; - *data++ = s->RxPauseFrames; - *data++ = s->RxFCSErrors; - *data++ = s->RxAlignErrors; - *data++ = s->RxSymbolErrors; - *data++ = s->RxDataErrors; - *data++ = s->RxSequenceErrors; - *data++ = s->RxRuntErrors; - *data++ = s->RxJabberErrors; - *data++ = s->RxInternalMACRcvError; - *data++ = s->RxInRangeLengthErrors; - *data++ = s->RxOutOfRangeLengthField; - *data++ = s->RxFrameTooLongErrors; + *data++ = s->RxOctetsOK; + *data++ = s->RxOctetsBad; + *data++ = s->RxUnicastFramesOK; + *data++ = s->RxMulticastFramesOK; + *data++ = s->RxBroadcastFramesOK; + *data++ = s->RxPauseFrames; + *data++ = s->RxFCSErrors; + *data++ = s->RxAlignErrors; + *data++ = s->RxSymbolErrors; + *data++ = s->RxDataErrors; + *data++ = s->RxSequenceErrors; + *data++ = s->RxRuntErrors; + *data++ = s->RxJabberErrors; + *data++ = s->RxInternalMACRcvError; + *data++ = s->RxInRangeLengthErrors; + *data++ = s->RxOutOfRangeLengthField; + *data++ = s->RxFrameTooLongErrors; + + *data++ = ss->tso; + *data++ = ss->vlan_xtract; + *data++ = ss->vlan_insert; + *data++ = ss->rx_cso_good; + *data++ = ss->tx_cso; + *data++ = ss->rx_drops; + + *data++ = (u64)t->respQ_empty; + *data++ = (u64)t->respQ_overflow; + *data++ = (u64)t->freelistQ_empty; + *data++ = (u64)t->pkt_too_big; + *data++ = (u64)t->pkt_mismatch; + *data++ = (u64)t->cmdQ_full[0]; + *data++ = (u64)t->cmdQ_full[1]; + *data++ = (u64)t->tx_ipfrags; + *data++ = (u64)t->tx_reg_pkts; + *data++ = (u64)t->tx_lso_pkts; + *data++ = (u64)t->tx_do_cksum; +} + +static inline void reg_block_dump(struct adapter *ap, void *buf, + unsigned int start, unsigned int end) +{ + u32 *p = buf + start; + + for ( ; start <= end; start += sizeof(u32)) + *p++ = readl(ap->regs + start); +} + +static void get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + struct adapter *ap = dev->priv; + + /* + * Version scheme: bits 0..9: chip version, bits 10..15: chip revision + */ + regs->version = 2; + + memset(buf, 0, T2_REGMAP_SIZE); + reg_block_dump(ap, buf, 0, A_SG_RESPACCUTIMER); } static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd) @@ -455,12 +540,12 @@ static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex = -1; } - cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; - cmd->phy_address = p->phy->addr; - cmd->transceiver = XCVR_EXTERNAL; - cmd->autoneg = p->link_config.autoneg; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; + cmd->phy_address = p->phy->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = p->link_config.autoneg; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; return 0; } @@ -506,7 +591,7 @@ static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct link_config *lc = &p->link_config; if (!(lc->supported & SUPPORTED_Autoneg)) - return -EOPNOTSUPP; /* can't change speed/duplex */ + return -EOPNOTSUPP; /* can't change speed/duplex */ if (cmd->autoneg == AUTONEG_DISABLE) { int cap = speed_duplex_to_caps(cmd->speed, cmd->duplex); @@ -631,7 +716,7 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e) return -EINVAL; if (adapter->flags & FULL_INIT_DONE) - return -EBUSY; + return -EBUSY; adapter->params.sge.freelQ_size[!jumbo_fl] = e->rx_pending; adapter->params.sge.freelQ_size[jumbo_fl] = e->rx_jumbo_pending; @@ -645,22 +730,20 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { struct adapter *adapter = dev->priv; - unsigned int sge_coalesce_usecs = 0; + /* + * If RX coalescing is requested we use NAPI, otherwise interrupts. + * This choice can be made only when all ports and the TOE are off. + */ + if (adapter->open_device_map == 0) + adapter->params.sge.polling = c->use_adaptive_rx_coalesce; - sge_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; - sge_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; - if ( (adapter->params.sge.coalesce_enable && !c->use_adaptive_rx_coalesce) && - (c->rx_coalesce_usecs == sge_coalesce_usecs) ) { - adapter->params.sge.rx_coalesce_usecs = - adapter->params.sge.default_rx_coalesce_usecs; + if (adapter->params.sge.polling) { + adapter->params.sge.rx_coalesce_usecs = 0; } else { adapter->params.sge.rx_coalesce_usecs = c->rx_coalesce_usecs; } - - adapter->params.sge.last_rx_coalesce_raw = adapter->params.sge.rx_coalesce_usecs; - adapter->params.sge.last_rx_coalesce_raw *= (board_info(adapter)->clock_core / 1000000); + adapter->params.sge.coalesce_enable = c->use_adaptive_rx_coalesce; adapter->params.sge.sample_interval_usecs = c->rate_sample_interval; - adapter->params.sge.coalesce_enable = c->use_adaptive_rx_coalesce; t1_sge_set_coalesce_params(adapter->sge, &adapter->params.sge); return 0; } @@ -669,12 +752,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { struct adapter *adapter = dev->priv; - if (adapter->params.sge.coalesce_enable) { /* Adaptive algorithm on */ - c->rx_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; - c->rx_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; - } else { - c->rx_coalesce_usecs = adapter->params.sge.rx_coalesce_usecs; - } + c->rx_coalesce_usecs = adapter->params.sge.rx_coalesce_usecs; c->rate_sample_interval = adapter->params.sge.sample_interval_usecs; c->use_adaptive_rx_coalesce = adapter->params.sge.coalesce_enable; return 0; @@ -682,9 +760,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) static int get_eeprom_len(struct net_device *dev) { - struct adapter *adapter = dev->priv; - - return t1_is_asic(adapter) ? EEPROM_SIZE : 0; + return EEPROM_SIZE; } #define EEPROM_MAGIC(ap) \ @@ -728,118 +804,55 @@ static struct ethtool_ops t1_ethtool_ops = { .get_strings = get_strings, .get_stats_count = get_stats_count, .get_ethtool_stats = get_stats, + .get_regs_len = get_regs_len, + .get_regs = get_regs, .get_tso = ethtool_op_get_tso, .set_tso = set_tso, }; -static int ethtool_ioctl(struct net_device *dev, void *useraddr) +static void cxgb_proc_cleanup(struct adapter *adapter, + struct proc_dir_entry *dir) { - u32 cmd; - struct adapter *adapter = dev->priv; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - switch (cmd) { - case ETHTOOL_SETREG: { - struct ethtool_reg edata; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) - return -EINVAL; - if (edata.addr == A_ESPI_MISC_CONTROL) - t1_espi_set_misc_ctrl(adapter, edata.val); - else { - if (edata.addr == 0x950) - t1_sge_set_ptimeout(adapter, edata.val); - else - writel(edata.val, adapter->regs + edata.addr); - } - break; - } - case ETHTOOL_GETREG: { - struct ethtool_reg edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) - return -EINVAL; - if (edata.addr >= 0x900 && edata.addr <= 0x93c) - edata.val = t1_espi_get_mon(adapter, edata.addr, 1); - else { - if (edata.addr == 0x950) - edata.val = t1_sge_get_ptimeout(adapter); - else - edata.val = readl(adapter->regs + edata.addr); - } - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - break; - } - case ETHTOOL_SETTPI: { - struct ethtool_reg edata; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0) - return -EINVAL; - t1_tpi_write(adapter, edata.addr, edata.val); - break; - } - case ETHTOOL_GETTPI: { - struct ethtool_reg edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0) - return -EINVAL; - t1_tpi_read(adapter, edata.addr, &edata.val); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - break; - } - default: - return -EOPNOTSUPP; - } - return 0; + const char *name; + name = adapter->name; + remove_proc_entry(name, dir); } +//#define chtoe_setup_toedev(adapter) NULL +#define update_mtu_tab(adapter) +#define write_smt_entry(adapter, idx) static int t1_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { - struct adapter *adapter = dev->priv; - struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; + struct adapter *adapter = dev->priv; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; switch (cmd) { - case SIOCGMIIPHY: - data->phy_id = adapter->port[dev->if_port].phy->addr; - /* FALLTHRU */ - case SIOCGMIIREG: { + case SIOCGMIIPHY: + data->phy_id = adapter->port[dev->if_port].phy->addr; + /* FALLTHRU */ + case SIOCGMIIREG: { struct cphy *phy = adapter->port[dev->if_port].phy; u32 val; - if (!phy->mdio_read) return -EOPNOTSUPP; + if (!phy->mdio_read) + return -EOPNOTSUPP; phy->mdio_read(adapter, data->phy_id, 0, data->reg_num & 0x1f, &val); - data->val_out = val; - break; + data->val_out = val; + break; } - case SIOCSMIIREG: { + case SIOCSMIIREG: { struct cphy *phy = adapter->port[dev->if_port].phy; - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (!phy->mdio_write) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!phy->mdio_write) + return -EOPNOTSUPP; phy->mdio_write(adapter, data->phy_id, 0, data->reg_num & 0x1f, - data->val_in); - break; + data->val_in); + break; } - case SIOCCHETHTOOL: - return ethtool_ioctl(dev, (void *)req->ifr_data); default: return -EOPNOTSUPP; } @@ -853,9 +866,9 @@ static int t1_change_mtu(struct net_device *dev, int new_mtu) struct cmac *mac = adapter->port[dev->if_port].mac; if (!mac->ops->set_mtu) - return -EOPNOTSUPP; + return -EOPNOTSUPP; if (new_mtu < 68) - return -EINVAL; + return -EINVAL; if ((ret = mac->ops->set_mtu(mac, new_mtu))) return ret; dev->mtu = new_mtu; @@ -902,9 +915,12 @@ static void vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) #ifdef CONFIG_NET_POLL_CONTROLLER static void t1_netpoll(struct net_device *dev) { + unsigned long flags; struct adapter *adapter = dev->priv; - t1_interrupt(adapter->pdev->irq, adapter, NULL); + local_irq_save(flags); + t1_select_intr_handler(adapter)(adapter->pdev->irq, adapter, NULL); + local_irq_restore(flags); } #endif @@ -938,16 +954,17 @@ static void mac_stats_task(void *data) */ static void ext_intr_task(void *data) { - u32 enable; struct adapter *adapter = data; elmer0_ext_intr_handler(adapter); /* Now reenable external interrupts */ - t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_EXT); - enable = t1_read_reg_4(adapter, A_PL_ENABLE); - t1_write_reg_4(adapter, A_PL_ENABLE, enable | F_PL_INTR_EXT); + spin_lock_irq(&adapter->async_lock); adapter->slow_intr_mask |= F_PL_INTR_EXT; + writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); + spin_unlock_irq(&adapter->async_lock); } /* @@ -955,15 +972,14 @@ static void ext_intr_task(void *data) */ void t1_elmer0_ext_intr(struct adapter *adapter) { - u32 enable = t1_read_reg_4(adapter, A_PL_ENABLE); - /* * Schedule a task to handle external interrupts as we require * a process context. We disable EXT interrupts in the interim * and let the task reenable them when it's done. */ adapter->slow_intr_mask &= ~F_PL_INTR_EXT; - t1_write_reg_4(adapter, A_PL_ENABLE, enable & ~F_PL_INTR_EXT); + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); schedule_work(&adapter->ext_intr_handler_task); } @@ -977,7 +993,6 @@ void t1_fatal_err(struct adapter *adapter) adapter->name); } - static int __devinit init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -990,14 +1005,14 @@ static int __devinit init_one(struct pci_dev *pdev, struct port_info *pi; if (!version_printed) { - printk(KERN_INFO "%s - version %s\n", driver_string, - driver_version); + printk(KERN_INFO "%s - version %s\n", DRV_DESCRIPTION, + DRV_VERSION); ++version_printed; } err = pci_enable_device(pdev); if (err) - return err; + return err; if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { CH_ERR("%s: cannot find PCI device memory base address\n", @@ -1006,20 +1021,22 @@ static int __devinit init_one(struct pci_dev *pdev, goto out_disable_pdev; } - if (!pci_set_dma_mask(pdev, PCI_DMA_64BIT)) { + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { pci_using_dac = 1; - if (pci_set_consistent_dma_mask(pdev, PCI_DMA_64BIT)) { + + if (pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) { CH_ERR("%s: unable to obtain 64-bit DMA for" "consistent allocations\n", pci_name(pdev)); err = -ENODEV; goto out_disable_pdev; } - } else if ((err = pci_set_dma_mask(pdev, PCI_DMA_32BIT)) != 0) { + + } else if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) != 0) { CH_ERR("%s: no usable DMA configuration\n", pci_name(pdev)); goto out_disable_pdev; } - err = pci_request_regions(pdev, driver_name); + err = pci_request_regions(pdev, DRV_NAME); if (err) { CH_ERR("%s: cannot obtain PCI resources\n", pci_name(pdev)); goto out_disable_pdev; @@ -1027,7 +1044,7 @@ static int __devinit init_one(struct pci_dev *pdev, pci_set_master(pdev); - mmio_start = pci_resource_start(pdev, 0); + mmio_start = pci_resource_start(pdev, 0); mmio_len = pci_resource_len(pdev, 0); bi = t1_get_board_info(ent->driver_data); @@ -1074,9 +1091,14 @@ static int __devinit init_one(struct pci_dev *pdev, ext_intr_task, adapter); INIT_WORK(&adapter->stats_update_task, mac_stats_task, adapter); +#ifdef work_struct + init_timer(&adapter->stats_update_timer); + adapter->stats_update_timer.function = mac_stats_timer; + adapter->stats_update_timer.data = + (unsigned long)adapter; +#endif pci_set_drvdata(pdev, netdev); - } pi = &adapter->port[i]; @@ -1088,11 +1110,12 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->mem_end = mmio_start + mmio_len - 1; netdev->priv = adapter; netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_LLTX; + adapter->flags |= RX_CSUM_ENABLED | TCP_CSUM_CAPABLE; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; if (vlan_tso_capable(adapter)) { - adapter->flags |= UDP_CSUM_CAPABLE; #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) adapter->flags |= VLAN_ACCEL_CAPABLE; netdev->features |= @@ -1120,7 +1143,7 @@ static int __devinit init_one(struct pci_dev *pdev, #endif netdev->weight = 64; - SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); + SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); } if (t1_init_sw_modules(adapter, bi) < 0) { @@ -1147,7 +1170,7 @@ static int __devinit init_one(struct pci_dev *pdev, if (!adapter->registered_device_map) adapter->name = adapter->port[i].dev->name; - __set_bit(i, &adapter->registered_device_map); + __set_bit(i, &adapter->registered_device_map); } } if (!adapter->registered_device_map) { @@ -1166,11 +1189,12 @@ static int __devinit init_one(struct pci_dev *pdev, t1_free_sw_modules(adapter); out_free_dev: if (adapter) { - if (adapter->regs) - iounmap(adapter->regs); + if (adapter->regs) iounmap(adapter->regs); for (i = bi->port_number - 1; i >= 0; --i) - if (adapter->port[i].dev) - free_netdev(adapter->port[i].dev); + if (adapter->port[i].dev) { + cxgb_proc_cleanup(adapter, proc_root_driver); + kfree(adapter->port[i].dev); + } } pci_release_regions(pdev); out_disable_pdev: @@ -1200,8 +1224,10 @@ static void __devexit remove_one(struct pci_dev *pdev) t1_free_sw_modules(adapter); iounmap(adapter->regs); while (--i >= 0) - if (adapter->port[i].dev) - free_netdev(adapter->port[i].dev); + if (adapter->port[i].dev) { + cxgb_proc_cleanup(adapter, proc_root_driver); + kfree(adapter->port[i].dev); + } pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); @@ -1210,7 +1236,7 @@ static void __devexit remove_one(struct pci_dev *pdev) } static struct pci_driver driver = { - .name = driver_name, + .name = DRV_NAME, .id_table = t1_pci_tbl, .probe = init_one, .remove = __devexit_p(remove_one), @@ -1228,4 +1254,3 @@ static void __exit t1_cleanup_module(void) module_init(t1_init_module); module_exit(t1_cleanup_module); - diff --git a/drivers/net/chelsio/cxgb2.h b/drivers/net/chelsio/cxgb2.h deleted file mode 100644 index 6ac326afcf01..000000000000 --- a/drivers/net/chelsio/cxgb2.h +++ /dev/null @@ -1,122 +0,0 @@ -/***************************************************************************** - * * - * File: cxgb2.h * - * $Revision: 1.8 $ * - * $Date: 2005/03/23 07:41:27 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CXGB_LINUX_H__ -#define __CXGB_LINUX_H__ - -#include -#include -#include -#include -#include - -/* This belongs in if_ether.h */ -#define ETH_P_CPL5 0xf - -struct cmac; -struct cphy; - -struct port_info { - struct net_device *dev; - struct cmac *mac; - struct cphy *phy; - struct link_config link_config; - struct net_device_stats netstats; -}; - -struct cxgbdev; -struct t1_sge; -struct pemc3; -struct pemc4; -struct pemc5; -struct peulp; -struct petp; -struct pecspi; -struct peespi; -struct work_struct; -struct vlan_group; - -enum { /* adapter flags */ - FULL_INIT_DONE = 0x1, - USING_MSI = 0x2, - TSO_CAPABLE = 0x4, - TCP_CSUM_CAPABLE = 0x8, - UDP_CSUM_CAPABLE = 0x10, - VLAN_ACCEL_CAPABLE = 0x20, - RX_CSUM_ENABLED = 0x40, -}; - -struct adapter { - u8 *regs; - struct pci_dev *pdev; - unsigned long registered_device_map; - unsigned long open_device_map; - unsigned int flags; - - const char *name; - int msg_enable; - u32 mmio_len; - - struct work_struct ext_intr_handler_task; - struct adapter_params params; - - struct vlan_group *vlan_grp; - - /* Terminator modules. */ - struct sge *sge; - struct pemc3 *mc3; - struct pemc4 *mc4; - struct pemc5 *mc5; - struct petp *tp; - struct pecspi *cspi; - struct peespi *espi; - struct peulp *ulp; - - struct port_info port[MAX_NPORTS]; - struct work_struct stats_update_task; - struct timer_list stats_update_timer; - - struct semaphore mib_mutex; - spinlock_t tpi_lock; - spinlock_t work_lock; - - spinlock_t async_lock ____cacheline_aligned; /* guards async operations */ - u32 slow_intr_mask; -}; - -#endif diff --git a/drivers/net/chelsio/elmer0.h b/drivers/net/chelsio/elmer0.h index 08f148643e7f..5590cb2dac19 100644 --- a/drivers/net/chelsio/elmer0.h +++ b/drivers/net/chelsio/elmer0.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: elmer0.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 22:49:43 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,14 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_ELMER0_H -#define CHELSIO_ELMER0_H - -/* ELMER0 flavors */ -enum { - ELMER0_XC2S300E_6FT256_C, - ELMER0_XC2S100E_6TQ144_C -}; +#ifndef _CXGB_ELMER0_H_ +#define _CXGB_ELMER0_H_ /* ELMER0 registers */ #define A_ELMER0_VERSION 0x100000 @@ -154,4 +148,4 @@ enum { #define MI1_OP_INDIRECT_READ_INC 2 #define MI1_OP_INDIRECT_READ 3 -#endif +#endif /* _CXGB_ELMER0_H_ */ diff --git a/drivers/net/chelsio/espi.c b/drivers/net/chelsio/espi.c index 7ec2dc7bafac..230642571c92 100644 --- a/drivers/net/chelsio/espi.c +++ b/drivers/net/chelsio/espi.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: espi.c * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.14 $ * + * $Date: 2005/05/14 00:59:32 $ * * Description: * * Ethernet SPI functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -63,15 +63,16 @@ static int tricn_write(adapter_t *adapter, int bundle_addr, int module_addr, { int busy, attempts = TRICN_CMD_ATTEMPTS; - t1_write_reg_4(adapter, A_ESPI_CMD_ADDR, V_WRITE_DATA(wr_data) | - V_REGISTER_OFFSET(reg_offset) | - V_CHANNEL_ADDR(ch_addr) | V_MODULE_ADDR(module_addr) | - V_BUNDLE_ADDR(bundle_addr) | - V_SPI4_COMMAND(TRICN_CMD_WRITE)); - t1_write_reg_4(adapter, A_ESPI_GOSTAT, 0); + writel(V_WRITE_DATA(wr_data) | + V_REGISTER_OFFSET(reg_offset) | + V_CHANNEL_ADDR(ch_addr) | V_MODULE_ADDR(module_addr) | + V_BUNDLE_ADDR(bundle_addr) | + V_SPI4_COMMAND(TRICN_CMD_WRITE), + adapter->regs + A_ESPI_CMD_ADDR); + writel(0, adapter->regs + A_ESPI_GOSTAT); do { - busy = t1_read_reg_4(adapter, A_ESPI_GOSTAT) & F_ESPI_CMD_BUSY; + busy = readl(adapter->regs + A_ESPI_GOSTAT) & F_ESPI_CMD_BUSY; } while (busy && --attempts); if (busy) @@ -99,12 +100,12 @@ static int tricn_init(adapter_t *adapter) /* 1 */ timeout=1000; do { - stat = t1_read_reg_4(adapter, A_ESPI_RX_RESET); + stat = readl(adapter->regs + A_ESPI_RX_RESET); is_ready = (stat & 0x4); timeout--; udelay(5); } while (!is_ready || (timeout==0)); - t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x2); + writel(0x2, adapter->regs + A_ESPI_RX_RESET); if (timeout==0) { CH_ERR("ESPI : ERROR : Timeout tricn_init() \n"); @@ -127,14 +128,14 @@ static int tricn_init(adapter_t *adapter) for (i=8; i<= 8; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xf1); /* 3 */ - t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x3); + writel(0x3, adapter->regs + A_ESPI_RX_RESET); return 0; } void t1_espi_intr_enable(struct peespi *espi) { - u32 enable, pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + u32 enable, pl_intr = readl(espi->adapter->regs + A_PL_ENABLE); /* * Cannot enable ESPI interrupts on T1B because HW asserts the @@ -144,28 +145,28 @@ void t1_espi_intr_enable(struct peespi *espi) * cannot be cleared (HW bug). */ enable = t1_is_T1B(espi->adapter) ? 0 : ESPI_INTR_MASK; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, enable); - t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr | F_PL_INTR_ESPI); + writel(enable, espi->adapter->regs + A_ESPI_INTR_ENABLE); + writel(pl_intr | F_PL_INTR_ESPI, espi->adapter->regs + A_PL_ENABLE); } void t1_espi_intr_clear(struct peespi *espi) { - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, 0xffffffff); - t1_write_reg_4(espi->adapter, A_PL_CAUSE, F_PL_INTR_ESPI); + writel(0xffffffff, espi->adapter->regs + A_ESPI_INTR_STATUS); + writel(F_PL_INTR_ESPI, espi->adapter->regs + A_PL_CAUSE); } void t1_espi_intr_disable(struct peespi *espi) { - u32 pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + u32 pl_intr = readl(espi->adapter->regs + A_PL_ENABLE); - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, 0); - t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr & ~F_PL_INTR_ESPI); + writel(0, espi->adapter->regs + A_ESPI_INTR_ENABLE); + writel(pl_intr & ~F_PL_INTR_ESPI, espi->adapter->regs + A_PL_ENABLE); } int t1_espi_intr_handler(struct peespi *espi) { u32 cnt; - u32 status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + u32 status = readl(espi->adapter->regs + A_ESPI_INTR_STATUS); if (status & F_DIP4ERR) espi->intr_cnt.DIP4_err++; @@ -184,7 +185,7 @@ int t1_espi_intr_handler(struct peespi *espi) * Must read the error count to clear the interrupt * that it causes. */ - cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + cnt = readl(espi->adapter->regs + A_ESPI_DIP2_ERR_COUNT); } /* @@ -193,68 +194,28 @@ int t1_espi_intr_handler(struct peespi *espi) */ if (status && t1_is_T1B(espi->adapter)) status = 1; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + writel(status, espi->adapter->regs + A_ESPI_INTR_STATUS); return 0; } +const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi) +{ + return &espi->intr_cnt; +} + static void espi_setup_for_pm3393(adapter_t *adapter) { u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(1) | V_TX_NPORTS(1)); -} - -static void espi_setup_for_vsc7321(adapter_t *adapter) -{ - u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; - - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(1) | V_TX_NPORTS(1)); -} - -/* - * Note that T1B requires at least 2 ports for IXF1010 due to a HW bug. - */ -static void espi_setup_for_ixf1010(adapter_t *adapter, int nports) -{ - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 1); - if (nports == 4) { - if (is_T2(adapter)) { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0xf00); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x3c0); - } else { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0x7ff); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x1ff); - } - } else { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0x1fff); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x7ff); - } - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(nports) | V_TX_NPORTS(nports)); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN0); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN1); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN2); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN3); + writel(0x100, adapter->regs + A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK); + writel(wmark, adapter->regs + A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK); + writel(3, adapter->regs + A_ESPI_CALENDAR_LENGTH); + writel(0x08000008, adapter->regs + A_ESPI_TRAIN); + writel(V_RX_NPORTS(1) | V_TX_NPORTS(1), adapter->regs + A_PORT_CONFIG); } /* T2 Init part -- */ @@ -263,43 +224,42 @@ static void espi_setup_for_ixf1010(adapter_t *adapter, int nports) /* 3. Init TriCN Hard Macro */ int t1_espi_init(struct peespi *espi, int mac_type, int nports) { + u32 cnt; + u32 status_enable_extra = 0; adapter_t *adapter = espi->adapter; - u32 cnt; u32 status, burstval = 0x800100; /* Disable ESPI training. MACs that can handle it enable it below. */ - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0); + writel(0, adapter->regs + A_ESPI_TRAIN); if (is_T2(adapter)) { - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - V_OUT_OF_SYNC_COUNT(4) | - V_DIP2_PARITY_ERR_THRES(3) | V_DIP4_THRES(1)); + writel(V_OUT_OF_SYNC_COUNT(4) | + V_DIP2_PARITY_ERR_THRES(3) | + V_DIP4_THRES(1), adapter->regs + A_ESPI_MISC_CONTROL); if (nports == 4) { /* T204: maxburst1 = 0x40, maxburst2 = 0x20 */ burstval = 0x200040; } } - t1_write_reg_4(adapter, A_ESPI_MAXBURST1_MAXBURST2, burstval); + writel(burstval, adapter->regs + A_ESPI_MAXBURST1_MAXBURST2); - if (mac_type == CHBT_MAC_PM3393) + switch (mac_type) { + case CHBT_MAC_PM3393: espi_setup_for_pm3393(adapter); - else if (mac_type == CHBT_MAC_VSC7321) - espi_setup_for_vsc7321(adapter); - else if (mac_type == CHBT_MAC_IXF1010) { - status_enable_extra = F_INTEL1010MODE; - espi_setup_for_ixf1010(adapter, nports); - } else + break; + default: return -1; + } /* * Make sure any pending interrupts from the SPI are * Cleared before enabling the interrupt. */ - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, ESPI_INTR_MASK); - status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + writel(ESPI_INTR_MASK, espi->adapter->regs + A_ESPI_INTR_ENABLE); + status = readl(espi->adapter->regs + A_ESPI_INTR_STATUS); if (status & F_DIP2PARITYERR) { - cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + cnt = readl(espi->adapter->regs + A_ESPI_DIP2_ERR_COUNT); } /* @@ -308,10 +268,10 @@ int t1_espi_init(struct peespi *espi, int mac_type, int nports) */ if (status && t1_is_T1B(espi->adapter)) status = 1; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + writel(status, espi->adapter->regs + A_ESPI_INTR_STATUS); - t1_write_reg_4(adapter, A_ESPI_FIFO_STATUS_ENABLE, - status_enable_extra | F_RXSTATUSENABLE); + writel(status_enable_extra | F_RXSTATUSENABLE, + adapter->regs + A_ESPI_FIFO_STATUS_ENABLE); if (is_T2(adapter)) { tricn_init(adapter); @@ -319,10 +279,10 @@ int t1_espi_init(struct peespi *espi, int mac_type, int nports) * Always position the control at the 1st port egress IN * (sop,eop) counter to reduce PIOs for T/N210 workaround. */ - espi->misc_ctrl = (t1_read_reg_4(adapter, A_ESPI_MISC_CONTROL) + espi->misc_ctrl = (readl(adapter->regs + A_ESPI_MISC_CONTROL) & ~MON_MASK) | (F_MONITORED_DIRECTION | F_MONITORED_INTERFACE); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); spin_lock_init(&espi->lock); } @@ -354,15 +314,16 @@ void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val) spin_lock(&espi->lock); espi->misc_ctrl = (val & ~MON_MASK) | (espi->misc_ctrl & MON_MASK); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); spin_unlock(&espi->lock); } u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) { - struct peespi *espi = adapter->espi; u32 sel; + struct peespi *espi = adapter->espi; + if (!is_T2(adapter)) return 0; sel = V_MONITORED_PORT_NUM((addr & 0x3c) >> 2); @@ -373,14 +334,13 @@ u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) else spin_lock(&espi->lock); if ((sel != (espi->misc_ctrl & MON_MASK))) { - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - ((espi->misc_ctrl & ~MON_MASK) | sel)); - sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - espi->misc_ctrl); + writel(((espi->misc_ctrl & ~MON_MASK) | sel), + adapter->regs + A_ESPI_MISC_CONTROL); + sel = readl(adapter->regs + A_ESPI_SCH_TOKEN3); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); } else - sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); + sel = readl(adapter->regs + A_ESPI_SCH_TOKEN3); spin_unlock(&espi->lock); return sel; } diff --git a/drivers/net/chelsio/espi.h b/drivers/net/chelsio/espi.h index 0f84e8b6399f..c90e37f8457c 100644 --- a/drivers/net/chelsio/espi.h +++ b/drivers/net/chelsio/espi.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: espi.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.7 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_ESPI_H -#define CHELSIO_ESPI_H +#ifndef _CXGB_ESPI_H_ +#define _CXGB_ESPI_H_ #include "common.h" @@ -60,8 +60,9 @@ void t1_espi_intr_enable(struct peespi *); void t1_espi_intr_clear(struct peespi *); void t1_espi_intr_disable(struct peespi *); int t1_espi_intr_handler(struct peespi *); +const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi); void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val); u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait); -#endif +#endif /* _CXGB_ESPI_H_ */ diff --git a/drivers/net/chelsio/gmac.h b/drivers/net/chelsio/gmac.h index 24501e2232cc..746b0eeea964 100644 --- a/drivers/net/chelsio/gmac.h +++ b/drivers/net/chelsio/gmac.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: gmac.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * Generic MAC functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -37,8 +37,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_GMAC_H -#define CHELSIO_GMAC_H +#ifndef _CXGB_GMAC_H_ +#define _CXGB_GMAC_H_ #include "common.h" @@ -130,4 +130,5 @@ extern struct gmac t1_chelsio_mac_ops; extern struct gmac t1_vsc7321_ops; extern struct gmac t1_ixf1010_ops; extern struct gmac t1_dummy_mac_ops; -#endif + +#endif /* _CXGB_GMAC_H_ */ diff --git a/drivers/net/chelsio/mv88x201x.c b/drivers/net/chelsio/mv88x201x.c index f54133af1bce..db5034282782 100644 --- a/drivers/net/chelsio/mv88x201x.c +++ b/drivers/net/chelsio/mv88x201x.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: mv88x201x.c * - * $Revision: 1.7 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.12 $ * + * $Date: 2005/04/15 19:27:14 $ * * Description: * * Marvell PHY (mv88x201x) functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -85,33 +85,29 @@ static int mv88x201x_reset(struct cphy *cphy, int wait) static int mv88x201x_interrupt_enable(struct cphy *cphy) { + u32 elmer; + /* Enable PHY LASI interrupts. */ mdio_write(cphy, 0x1, 0x9002, 0x1); /* Enable Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - u32 elmer; - - t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); - elmer |= ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); return 0; } static int mv88x201x_interrupt_disable(struct cphy *cphy) { + u32 elmer; + /* Disable PHY LASI interrupts. */ mdio_write(cphy, 0x1, 0x9002, 0x0); /* Disable Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - u32 elmer; - - t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); - elmer &= ~ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer &= ~ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); return 0; } @@ -144,11 +140,9 @@ static int mv88x201x_interrupt_clear(struct cphy *cphy) #endif /* Clear Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - t1_tpi_read(cphy->adapter, A_ELMER0_INT_CAUSE, &elmer); - elmer |= ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_CAUSE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_CAUSE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_CAUSE, elmer); return 0; } diff --git a/drivers/net/chelsio/osdep.h b/drivers/net/chelsio/osdep.h deleted file mode 100644 index 095cb474434f..000000000000 --- a/drivers/net/chelsio/osdep.h +++ /dev/null @@ -1,169 +0,0 @@ -/***************************************************************************** - * * - * File: osdep.h * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CHELSIO_OSDEP_H -#define __CHELSIO_OSDEP_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxgb2.h" - -#define DRV_NAME "cxgb" -#define PFX DRV_NAME ": " - -#define CH_ERR(fmt, ...) printk(KERN_ERR PFX fmt, ## __VA_ARGS__) -#define CH_WARN(fmt, ...) printk(KERN_WARNING PFX fmt, ## __VA_ARGS__) -#define CH_ALERT(fmt, ...) printk(KERN_ALERT PFX fmt, ## __VA_ARGS__) - -/* - * More powerful macro that selectively prints messages based on msg_enable. - * For info and debugging messages. - */ -#define CH_MSG(adapter, level, category, fmt, ...) do { \ - if ((adapter)->msg_enable & NETIF_MSG_##category) \ - printk(KERN_##level PFX "%s: " fmt, (adapter)->name, \ - ## __VA_ARGS__); \ -} while (0) - -#ifdef DEBUG -# define CH_DBG(adapter, category, fmt, ...) \ - CH_MSG(adapter, DEBUG, category, fmt, ## __VA_ARGS__) -#else -# define CH_DBG(fmt, ...) -#endif - -/* Additional NETIF_MSG_* categories */ -#define NETIF_MSG_MMIO 0x8000000 - -#define CH_DEVICE(devid, ssid, idx) \ - { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx } - -#define SUPPORTED_PAUSE (1 << 13) -#define SUPPORTED_LOOPBACK (1 << 15) - -#define ADVERTISED_PAUSE (1 << 13) -#define ADVERTISED_ASYM_PAUSE (1 << 14) - -/* - * Now that we have included the driver's main data structure, - * we typedef it to something the rest of the system understands. - */ -typedef struct adapter adapter_t; - -#define TPI_LOCK(adapter) spin_lock(&(adapter)->tpi_lock) -#define TPI_UNLOCK(adapter) spin_unlock(&(adapter)->tpi_lock) - -void t1_elmer0_ext_intr(adapter_t *adapter); -void t1_link_changed(adapter_t *adapter, int port_id, int link_status, - int speed, int duplex, int fc); - -static inline u16 t1_read_reg_2(adapter_t *adapter, u32 reg_addr) -{ - u16 val = readw(adapter->regs + reg_addr); - - CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, - val); - return val; -} - -static inline void t1_write_reg_2(adapter_t *adapter, u32 reg_addr, u16 val) -{ - CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, - val); - writew(val, adapter->regs + reg_addr); -} - -static inline u32 t1_read_reg_4(adapter_t *adapter, u32 reg_addr) -{ - u32 val = readl(adapter->regs + reg_addr); - - CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, - val); - return val; -} - -static inline void t1_write_reg_4(adapter_t *adapter, u32 reg_addr, u32 val) -{ - CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, - val); - writel(val, adapter->regs + reg_addr); -} - -static inline const char *port_name(adapter_t *adapter, int port_idx) -{ - return adapter->port[port_idx].dev->name; -} - -static inline void t1_set_hw_addr(adapter_t *adapter, int port_idx, - u8 hw_addr[]) -{ - memcpy(adapter->port[port_idx].dev->dev_addr, hw_addr, ETH_ALEN); -} - -struct t1_rx_mode { - struct net_device *dev; - u32 idx; - struct dev_mc_list *list; -}; - -#define t1_rx_mode_promisc(rm) (rm->dev->flags & IFF_PROMISC) -#define t1_rx_mode_allmulti(rm) (rm->dev->flags & IFF_ALLMULTI) -#define t1_rx_mode_mc_cnt(rm) (rm->dev->mc_count) - -static inline u8 *t1_get_next_mcaddr(struct t1_rx_mode *rm) -{ - u8 *addr = 0; - - if (rm->idx++ < rm->dev->mc_count) { - addr = rm->list->dmi_addr; - rm->list = rm->list->next; - } - return addr; -} - -#endif diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c index 17bd20f60d99..04a1404fc65e 100644 --- a/drivers/net/chelsio/pm3393.c +++ b/drivers/net/chelsio/pm3393.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: pm3393.c * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.16 $ * + * $Date: 2005/05/14 00:59:32 $ * * Description: * * PMC/SIERRA (pm3393) MAC-PHY functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -45,15 +45,19 @@ /* 802.3ae 10Gb/s MDIO Manageable Device(MMD) */ -#define MMD_RESERVED 0 -#define MMD_PMAPMD 1 -#define MMD_WIS 2 -#define MMD_PCS 3 -#define MMD_PHY_XGXS 4 /* XGMII Extender Sublayer */ -#define MMD_DTE_XGXS 5 +enum { + MMD_RESERVED, + MMD_PMAPMD, + MMD_WIS, + MMD_PCS, + MMD_PHY_XGXS, /* XGMII Extender Sublayer */ + MMD_DTE_XGXS, +}; -#define PHY_XGXS_CTRL_1 0 -#define PHY_XGXS_STATUS_1 1 +enum { + PHY_XGXS_CTRL_1, + PHY_XGXS_STATUS_1 +}; #define OFFSET(REG_ADDR) (REG_ADDR << 2) @@ -160,9 +164,9 @@ static int pm3393_interrupt_enable(struct cmac *cmac) 0 /*SUNI1x10GEXP_BITMSK_TOP_INTE */ ); /* TERMINATOR - PL_INTERUPTS_EXT */ - pl_intr = t1_read_reg_4(cmac->adapter, A_PL_ENABLE); + pl_intr = readl(cmac->adapter->regs + A_PL_ENABLE); pl_intr |= F_PL_INTR_EXT; - t1_write_reg_4(cmac->adapter, A_PL_ENABLE, pl_intr); + writel(pl_intr, cmac->adapter->regs + A_PL_ENABLE); return 0; } @@ -242,9 +246,9 @@ static int pm3393_interrupt_clear(struct cmac *cmac) /* TERMINATOR - PL_INTERUPTS_EXT */ - pl_intr = t1_read_reg_4(cmac->adapter, A_PL_CAUSE); + pl_intr = readl(cmac->adapter->regs + A_PL_CAUSE); pl_intr |= F_PL_INTR_EXT; - t1_write_reg_4(cmac->adapter, A_PL_CAUSE, pl_intr); + writel(pl_intr, cmac->adapter->regs + A_PL_CAUSE); return 0; } @@ -261,8 +265,6 @@ static int pm3393_interrupt_handler(struct cmac *cmac) /* Read the master interrupt status register. */ pmread(cmac, SUNI1x10GEXP_REG_MASTER_INTERRUPT_STATUS, &master_intr_status); - CH_DBG(cmac->adapter, INTR, "PM3393 intr cause 0x%x\n", - master_intr_status); /* TBD XXX Lets just clear everything for now */ pm3393_interrupt_clear(cmac); @@ -703,10 +705,9 @@ static struct cmac *pm3393_mac_create(adapter_t *adapter, int index) t1_tpi_write(adapter, OFFSET(0x3040), 0x0c32); /* # TXXG Config */ /* For T1 use timer based Mac flow control. */ - if (t1_is_T1B(adapter)) - t1_tpi_write(adapter, OFFSET(0x304d), 0x8000); + t1_tpi_write(adapter, OFFSET(0x304d), 0x8000); t1_tpi_write(adapter, OFFSET(0x2040), 0x059c); /* # RXXG Config */ - t1_tpi_write(adapter, OFFSET(0x2049), 0x0000); /* # RXXG Cut Through */ + t1_tpi_write(adapter, OFFSET(0x2049), 0x0001); /* # RXXG Cut Through */ t1_tpi_write(adapter, OFFSET(0x2070), 0x0000); /* # Disable promiscuous mode */ /* Setup Exact Match Filter 0 to allow broadcast packets. @@ -814,12 +815,6 @@ static int pm3393_mac_reset(adapter_t * adapter) successful_reset = (is_pl4_reset_finished && !is_pl4_outof_lock && is_xaui_mabc_pll_locked); - - CH_DBG(adapter, HW, - "PM3393 HW reset %d: pl4_reset 0x%x, val 0x%x, " - "is_pl4_outof_lock 0x%x, xaui_locked 0x%x\n", - i, is_pl4_reset_finished, val, is_pl4_outof_lock, - is_xaui_mabc_pll_locked); } return successful_reset ? 0 : 1; } diff --git a/drivers/net/chelsio/regs.h b/drivers/net/chelsio/regs.h index 5a70803eb1b6..b90e11f40d1f 100644 --- a/drivers/net/chelsio/regs.h +++ b/drivers/net/chelsio/regs.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: regs.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.8 $ * + * $Date: 2005/06/21 18:29:48 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,7 +36,8 @@ * * ****************************************************************************/ -/* Do not edit this file */ +#ifndef _CXGB_REGS_H_ +#define _CXGB_REGS_H_ /* SGE registers */ #define A_SG_CONTROL 0x0 @@ -74,6 +75,14 @@ #define V_DISABLE_CMDQ1_GTS(x) ((x) << S_DISABLE_CMDQ1_GTS) #define F_DISABLE_CMDQ1_GTS V_DISABLE_CMDQ1_GTS(1U) +#define S_DISABLE_FL0_GTS 10 +#define V_DISABLE_FL0_GTS(x) ((x) << S_DISABLE_FL0_GTS) +#define F_DISABLE_FL0_GTS V_DISABLE_FL0_GTS(1U) + +#define S_DISABLE_FL1_GTS 11 +#define V_DISABLE_FL1_GTS(x) ((x) << S_DISABLE_FL1_GTS) +#define F_DISABLE_FL1_GTS V_DISABLE_FL1_GTS(1U) + #define S_ENABLE_BIG_ENDIAN 12 #define V_ENABLE_BIG_ENDIAN(x) ((x) << S_ENABLE_BIG_ENDIAN) #define F_ENABLE_BIG_ENDIAN V_ENABLE_BIG_ENDIAN(1U) @@ -132,6 +141,7 @@ #define F_PACKET_MISMATCH V_PACKET_MISMATCH(1U) #define A_SG_INT_CAUSE 0xbc +#define A_SG_RESPACCUTIMER 0xc0 /* MC3 registers */ @@ -247,6 +257,10 @@ #define V_SYN_COOKIE_PARAMETER(x) ((x) << S_SYN_COOKIE_PARAMETER) #define A_TP_PC_CONFIG 0x348 +#define S_DIS_TX_FILL_WIN_PUSH 12 +#define V_DIS_TX_FILL_WIN_PUSH(x) ((x) << S_DIS_TX_FILL_WIN_PUSH) +#define F_DIS_TX_FILL_WIN_PUSH V_DIS_TX_FILL_WIN_PUSH(1U) + #define S_TP_PC_REV 30 #define M_TP_PC_REV 0x3 #define G_TP_PC_REV(x) (((x) >> S_TP_PC_REV) & M_TP_PC_REV) @@ -451,3 +465,4 @@ #define M_PCI_MODE_CLK 0x3 #define G_PCI_MODE_CLK(x) (((x) >> S_PCI_MODE_CLK) & M_PCI_MODE_CLK) +#endif /* _CXGB_REGS_H_ */ diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index bcf8b1e939b0..53b41d99b00b 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: sge.c * - * $Revision: 1.13 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.26 $ * + * $Date: 2005/06/21 18:29:48 $ * * Description: * * DMA engine. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -58,59 +58,62 @@ #include "regs.h" #include "espi.h" + +#ifdef NETIF_F_TSO #include +#endif #define SGE_CMDQ_N 2 #define SGE_FREELQ_N 2 -#define SGE_CMDQ0_E_N 512 +#define SGE_CMDQ0_E_N 1024 #define SGE_CMDQ1_E_N 128 #define SGE_FREEL_SIZE 4096 #define SGE_JUMBO_FREEL_SIZE 512 #define SGE_FREEL_REFILL_THRESH 16 #define SGE_RESPQ_E_N 1024 -#define SGE_INTR_BUCKETSIZE 100 -#define SGE_INTR_LATBUCKETS 5 -#define SGE_INTR_MAXBUCKETS 11 -#define SGE_INTRTIMER0 1 -#define SGE_INTRTIMER1 50 -#define SGE_INTRTIMER_NRES 10000 -#define SGE_RX_COPY_THRESHOLD 256 +#define SGE_INTRTIMER_NRES 1000 +#define SGE_RX_COPY_THRES 256 #define SGE_RX_SM_BUF_SIZE 1536 -#define SGE_RESPQ_REPLENISH_THRES ((3 * SGE_RESPQ_E_N) / 4) +# define SGE_RX_DROP_THRES 2 -#define SGE_RX_OFFSET 2 -#ifndef NET_IP_ALIGN -# define NET_IP_ALIGN SGE_RX_OFFSET -#endif +#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) /* - * Memory Mapped HW Command, Freelist and Response Queue Descriptors + * Period of the TX buffer reclaim timer. This timer does not need to run + * frequently as TX buffers are usually reclaimed by new TX packets. + */ +#define TX_RECLAIM_PERIOD (HZ / 4) + +#ifndef NET_IP_ALIGN +# define NET_IP_ALIGN 2 +#endif + +#define M_CMD_LEN 0x7fffffff +#define V_CMD_LEN(v) (v) +#define G_CMD_LEN(v) ((v) & M_CMD_LEN) +#define V_CMD_GEN1(v) ((v) << 31) +#define V_CMD_GEN2(v) (v) +#define F_CMD_DATAVALID (1 << 1) +#define F_CMD_SOP (1 << 2) +#define V_CMD_EOP(v) ((v) << 3) + +/* + * Command queue, receive buffer list, and response queue descriptors. */ #if defined(__BIG_ENDIAN_BITFIELD) struct cmdQ_e { - u32 AddrLow; - u32 GenerationBit : 1; - u32 BufferLength : 31; - u32 RespQueueSelector : 4; - u32 ResponseTokens : 12; - u32 CmdId : 8; - u32 Reserved : 3; - u32 TokenValid : 1; - u32 Eop : 1; - u32 Sop : 1; - u32 DataValid : 1; - u32 GenerationBit2 : 1; - u32 AddrHigh; + u32 addr_lo; + u32 len_gen; + u32 flags; + u32 addr_hi; }; struct freelQ_e { - u32 AddrLow; - u32 GenerationBit : 1; - u32 BufferLength : 31; - u32 Reserved : 31; - u32 GenerationBit2 : 1; - u32 AddrHigh; + u32 addr_lo; + u32 len_gen; + u32 gen2; + u32 addr_hi; }; struct respQ_e { @@ -128,31 +131,19 @@ struct respQ_e { u32 GenerationBit : 1; u32 BufferLength; }; - #elif defined(__LITTLE_ENDIAN_BITFIELD) struct cmdQ_e { - u32 BufferLength : 31; - u32 GenerationBit : 1; - u32 AddrLow; - u32 AddrHigh; - u32 GenerationBit2 : 1; - u32 DataValid : 1; - u32 Sop : 1; - u32 Eop : 1; - u32 TokenValid : 1; - u32 Reserved : 3; - u32 CmdId : 8; - u32 ResponseTokens : 12; - u32 RespQueueSelector : 4; + u32 len_gen; + u32 addr_lo; + u32 addr_hi; + u32 flags; }; struct freelQ_e { - u32 BufferLength : 31; - u32 GenerationBit : 1; - u32 AddrLow; - u32 AddrHigh; - u32 GenerationBit2 : 1; - u32 Reserved : 31; + u32 len_gen; + u32 addr_lo; + u32 addr_hi; + u32 gen2; }; struct respQ_e { @@ -179,7 +170,6 @@ struct cmdQ_ce { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(dma_addr); DECLARE_PCI_UNMAP_LEN(dma_len); - unsigned int single; }; struct freelQ_ce { @@ -189,44 +179,52 @@ struct freelQ_ce { }; /* - * SW Command, Freelist and Response Queue + * SW command, freelist and response rings */ struct cmdQ { - atomic_t asleep; /* HW DMA Fetch status */ - atomic_t credits; /* # available descriptors for TX */ - atomic_t pio_pidx; /* Variable updated on Doorbell */ - u16 entries_n; /* # descriptors for TX */ - u16 pidx; /* producer index (SW) */ - u16 cidx; /* consumer index (HW) */ - u8 genbit; /* current generation (=valid) bit */ - struct cmdQ_e *entries; /* HW command descriptor Q */ - struct cmdQ_ce *centries; /* SW command context descriptor Q */ - spinlock_t Qlock; /* Lock to protect cmdQ enqueuing */ - dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ + unsigned long status; /* HW DMA fetch status */ + unsigned int in_use; /* # of in-use command descriptors */ + unsigned int size; /* # of descriptors */ + unsigned int processed; /* total # of descs HW has processed */ + unsigned int cleaned; /* total # of descs SW has reclaimed */ + unsigned int stop_thres; /* SW TX queue suspend threshold */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u8 genbit; /* current generation (=valid) bit */ + u8 sop; /* is next entry start of packet? */ + struct cmdQ_e *entries; /* HW command descriptor Q */ + struct cmdQ_ce *centries; /* SW command context descriptor Q */ + spinlock_t lock; /* Lock to protect cmdQ enqueuing */ + dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ }; struct freelQ { - unsigned int credits; /* # of available RX buffers */ - unsigned int entries_n; /* free list capacity */ - u16 pidx; /* producer index (SW) */ - u16 cidx; /* consumer index (HW) */ + unsigned int credits; /* # of available RX buffers */ + unsigned int size; /* free list capacity */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ u16 rx_buffer_size; /* Buffer size on this free list */ u16 dma_offset; /* DMA offset to align IP headers */ - u8 genbit; /* current generation (=valid) bit */ - struct freelQ_e *entries; /* HW freelist descriptor Q */ - struct freelQ_ce *centries; /* SW freelist conext descriptor Q */ - dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ + u16 recycleq_idx; /* skb recycle q to use */ + u8 genbit; /* current generation (=valid) bit */ + struct freelQ_e *entries; /* HW freelist descriptor Q */ + struct freelQ_ce *centries; /* SW freelist context descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ }; struct respQ { - u16 credits; /* # of available respQ descriptors */ - u16 credits_pend; /* # of not yet returned descriptors */ - u16 entries_n; /* # of response Q descriptors */ - u16 pidx; /* producer index (HW) */ - u16 cidx; /* consumer index (SW) */ - u8 genbit; /* current generation(=valid) bit */ + unsigned int credits; /* credits to be returned to SGE */ + unsigned int size; /* # of response Q descriptors */ + u16 cidx; /* consumer index (SW) */ + u8 genbit; /* current generation(=valid) bit */ struct respQ_e *entries; /* HW response descriptor Q */ - dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ +}; + +/* Bit flags for cmdQ.status */ +enum { + CMDQ_STAT_RUNNING = 1, /* fetch engine is running */ + CMDQ_STAT_LAST_PKT_DB = 2 /* last packet rung the doorbell */ }; /* @@ -239,134 +237,50 @@ struct respQ { */ struct sge { struct adapter *adapter; /* adapter backpointer */ - struct freelQ freelQ[SGE_FREELQ_N]; /* freelist Q(s) */ - struct respQ respQ; /* response Q instatiation */ + struct net_device *netdev; /* netdevice backpointer */ + struct freelQ freelQ[SGE_FREELQ_N]; /* buffer free lists */ + struct respQ respQ; /* response Q */ + unsigned long stopped_tx_queues; /* bitmap of suspended Tx queues */ unsigned int rx_pkt_pad; /* RX padding for L2 packets */ unsigned int jumbo_fl; /* jumbo freelist Q index */ - u32 intrtimer[SGE_INTR_MAXBUCKETS]; /* ! */ - u32 currIndex; /* current index into intrtimer[] */ - u32 intrtimer_nres; /* no resource interrupt timer value */ - u32 sge_control; /* shadow content of sge control reg */ - struct sge_intr_counts intr_cnt; - struct timer_list ptimer; - struct sk_buff *pskb; - u32 ptimeout; - struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned; /* command Q(s)*/ + unsigned int intrtimer_nres; /* no-resource interrupt timer */ + unsigned int fixed_intrtimer;/* non-adaptive interrupt timer */ + struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ + struct timer_list espibug_timer; + unsigned int espibug_timeout; + struct sk_buff *espibug_skb; + u32 sge_control; /* shadow value of sge control reg */ + struct sge_intr_counts stats; + struct sge_port_stats port_stats[MAX_NPORTS]; + struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned_in_smp; }; -static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, - unsigned int qid); - /* * PIO to indicate that memory mapped Q contains valid descriptor(s). */ -static inline void doorbell_pio(struct sge *sge, u32 val) +static inline void doorbell_pio(struct adapter *adapter, u32 val) { wmb(); - t1_write_reg_4(sge->adapter, A_SG_DOORBELL, val); -} - -/* - * Disables the DMA engine. - */ -void t1_sge_stop(struct sge *sge) -{ - t1_write_reg_4(sge->adapter, A_SG_CONTROL, 0); - t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ - if (is_T2(sge->adapter)) - del_timer_sync(&sge->ptimer); -} - -static u8 ch_mac_addr[ETH_ALEN] = {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; -static void t1_espi_workaround(void *data) -{ - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; - - if (netif_running(adapter->port[0].dev) && - atomic_read(&sge->cmdQ[0].asleep)) { - - u32 seop = t1_espi_get_mon(adapter, 0x930, 0); - - if ((seop & 0xfff0fff) == 0xfff && sge->pskb) { - struct sk_buff *skb = sge->pskb; - if (!skb->cb[0]) { - memcpy(skb->data+sizeof(struct cpl_tx_pkt), ch_mac_addr, ETH_ALEN); - memcpy(skb->data+skb->len-10, ch_mac_addr, ETH_ALEN); - - skb->cb[0] = 0xff; - } - t1_sge_tx(skb, adapter,0); - } - } - mod_timer(&adapter->sge->ptimer, jiffies + sge->ptimeout); -} - -/* - * Enables the DMA engine. - */ -void t1_sge_start(struct sge *sge) -{ - t1_write_reg_4(sge->adapter, A_SG_CONTROL, sge->sge_control); - t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ - if (is_T2(sge->adapter)) { - init_timer(&sge->ptimer); - sge->ptimer.function = (void *)&t1_espi_workaround; - sge->ptimer.data = (unsigned long)sge->adapter; - sge->ptimer.expires = jiffies + sge->ptimeout; - add_timer(&sge->ptimer); - } -} - -/* - * Creates a t1_sge structure and returns suggested resource parameters. - */ -struct sge * __devinit t1_sge_create(struct adapter *adapter, - struct sge_params *p) -{ - struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); - - if (!sge) - return NULL; - memset(sge, 0, sizeof(*sge)); - - if (is_T2(adapter)) - sge->ptimeout = 1; /* finest allowed */ - - sge->adapter = adapter; - sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : SGE_RX_OFFSET; - sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; - - p->cmdQ_size[0] = SGE_CMDQ0_E_N; - p->cmdQ_size[1] = SGE_CMDQ1_E_N; - p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; - p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; - p->rx_coalesce_usecs = SGE_INTRTIMER1; - p->last_rx_coalesce_raw = SGE_INTRTIMER1 * - (board_info(sge->adapter)->clock_core / 1000000); - p->default_rx_coalesce_usecs = SGE_INTRTIMER1; - p->coalesce_enable = 0; /* Turn off adaptive algorithm by default */ - p->sample_interval_usecs = 0; - return sge; + writel(val, adapter->regs + A_SG_DOORBELL); } /* * Frees all RX buffers on the freelist Q. The caller must make sure that * the SGE is turned off before calling this function. */ -static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *Q) +static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *q) { - unsigned int cidx = Q->cidx, credits = Q->credits; + unsigned int cidx = q->cidx; - while (credits--) { - struct freelQ_ce *ce = &Q->centries[cidx]; + while (q->credits--) { + struct freelQ_ce *ce = &q->centries[cidx]; pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); dev_kfree_skb(ce->skb); ce->skb = NULL; - if (++cidx == Q->entries_n) + if (++cidx == q->size) cidx = 0; } } @@ -380,29 +294,29 @@ static void free_rx_resources(struct sge *sge) unsigned int size, i; if (sge->respQ.entries) { - size = sizeof(struct respQ_e) * sge->respQ.entries_n; + size = sizeof(struct respQ_e) * sge->respQ.size; pci_free_consistent(pdev, size, sge->respQ.entries, sge->respQ.dma_addr); } for (i = 0; i < SGE_FREELQ_N; i++) { - struct freelQ *Q = &sge->freelQ[i]; + struct freelQ *q = &sge->freelQ[i]; - if (Q->centries) { - free_freelQ_buffers(pdev, Q); - kfree(Q->centries); + if (q->centries) { + free_freelQ_buffers(pdev, q); + kfree(q->centries); } - if (Q->entries) { - size = sizeof(struct freelQ_e) * Q->entries_n; - pci_free_consistent(pdev, size, Q->entries, - Q->dma_addr); + if (q->entries) { + size = sizeof(struct freelQ_e) * q->size; + pci_free_consistent(pdev, size, q->entries, + q->dma_addr); } } } /* * Allocates basic RX resources, consisting of memory mapped freelist Qs and a - * response Q. + * response queue. */ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) { @@ -410,21 +324,22 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) unsigned int size, i; for (i = 0; i < SGE_FREELQ_N; i++) { - struct freelQ *Q = &sge->freelQ[i]; + struct freelQ *q = &sge->freelQ[i]; - Q->genbit = 1; - Q->entries_n = p->freelQ_size[i]; - Q->dma_offset = SGE_RX_OFFSET - sge->rx_pkt_pad; - size = sizeof(struct freelQ_e) * Q->entries_n; - Q->entries = (struct freelQ_e *) - pci_alloc_consistent(pdev, size, &Q->dma_addr); - if (!Q->entries) + q->genbit = 1; + q->size = p->freelQ_size[i]; + q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN; + size = sizeof(struct freelQ_e) * q->size; + q->entries = (struct freelQ_e *) + pci_alloc_consistent(pdev, size, &q->dma_addr); + if (!q->entries) goto err_no_mem; - memset(Q->entries, 0, size); - Q->centries = kcalloc(Q->entries_n, sizeof(struct freelQ_ce), - GFP_KERNEL); - if (!Q->centries) + memset(q->entries, 0, size); + size = sizeof(struct freelQ_ce) * q->size; + q->centries = kmalloc(size, GFP_KERNEL); + if (!q->centries) goto err_no_mem; + memset(q->centries, 0, size); } /* @@ -440,10 +355,17 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) sge->freelQ[sge->jumbo_fl].rx_buffer_size = (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + /* + * Setup which skb recycle Q should be used when recycling buffers from + * each free list. + */ + sge->freelQ[!sge->jumbo_fl].recycleq_idx = 0; + sge->freelQ[sge->jumbo_fl].recycleq_idx = 1; + sge->respQ.genbit = 1; - sge->respQ.entries_n = SGE_RESPQ_E_N; - sge->respQ.credits = SGE_RESPQ_E_N; - size = sizeof(struct respQ_e) * sge->respQ.entries_n; + sge->respQ.size = SGE_RESPQ_E_N; + sge->respQ.credits = 0; + size = sizeof(struct respQ_e) * sge->respQ.size; sge->respQ.entries = (struct respQ_e *) pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr); if (!sge->respQ.entries) @@ -457,48 +379,37 @@ err_no_mem: } /* - * Frees 'credits_pend' TX buffers and returns the credits to Q->credits. - * - * The adaptive algorithm receives the total size of the buffers freed - * accumulated in @*totpayload. No initialization of this argument here. - * + * Reclaims n TX descriptors and frees the buffers associated with them. */ -static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *Q, - unsigned int credits_pend, unsigned int *totpayload) +static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n) { + struct cmdQ_ce *ce; struct pci_dev *pdev = sge->adapter->pdev; - struct sk_buff *skb; - struct cmdQ_ce *ce, *cq = Q->centries; - unsigned int entries_n = Q->entries_n, cidx = Q->cidx, - i = credits_pend; + unsigned int cidx = q->cidx; - - ce = &cq[cidx]; - while (i--) { - if (ce->single) + q->in_use -= n; + ce = &q->centries[cidx]; + while (n--) { + if (q->sop) pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), + pci_unmap_len(ce, dma_len), PCI_DMA_TODEVICE); else pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), + pci_unmap_len(ce, dma_len), PCI_DMA_TODEVICE); - if (totpayload) - *totpayload += pci_unmap_len(ce, dma_len); - - skb = ce->skb; - if (skb) - dev_kfree_skb_irq(skb); - + q->sop = 0; + if (ce->skb) { + dev_kfree_skb(ce->skb); + q->sop = 1; + } ce++; - if (++cidx == entries_n) { + if (++cidx == q->size) { cidx = 0; - ce = cq; + ce = q->centries; } } - - Q->cidx = cidx; - atomic_add(credits_pend, &Q->credits); + q->cidx = cidx; } /* @@ -512,20 +423,17 @@ static void free_tx_resources(struct sge *sge) unsigned int size, i; for (i = 0; i < SGE_CMDQ_N; i++) { - struct cmdQ *Q = &sge->cmdQ[i]; + struct cmdQ *q = &sge->cmdQ[i]; - if (Q->centries) { - unsigned int pending = Q->entries_n - - atomic_read(&Q->credits); - - if (pending) - free_cmdQ_buffers(sge, Q, pending, NULL); - kfree(Q->centries); + if (q->centries) { + if (q->in_use) + free_cmdQ_buffers(sge, q, q->in_use); + kfree(q->centries); } - if (Q->entries) { - size = sizeof(struct cmdQ_e) * Q->entries_n; - pci_free_consistent(pdev, size, Q->entries, - Q->dma_addr); + if (q->entries) { + size = sizeof(struct cmdQ_e) * q->size; + pci_free_consistent(pdev, size, q->entries, + q->dma_addr); } } } @@ -539,25 +447,38 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p) unsigned int size, i; for (i = 0; i < SGE_CMDQ_N; i++) { - struct cmdQ *Q = &sge->cmdQ[i]; + struct cmdQ *q = &sge->cmdQ[i]; - Q->genbit = 1; - Q->entries_n = p->cmdQ_size[i]; - atomic_set(&Q->credits, Q->entries_n); - atomic_set(&Q->asleep, 1); - spin_lock_init(&Q->Qlock); - size = sizeof(struct cmdQ_e) * Q->entries_n; - Q->entries = (struct cmdQ_e *) - pci_alloc_consistent(pdev, size, &Q->dma_addr); - if (!Q->entries) + q->genbit = 1; + q->sop = 1; + q->size = p->cmdQ_size[i]; + q->in_use = 0; + q->status = 0; + q->processed = q->cleaned = 0; + q->stop_thres = 0; + spin_lock_init(&q->lock); + size = sizeof(struct cmdQ_e) * q->size; + q->entries = (struct cmdQ_e *) + pci_alloc_consistent(pdev, size, &q->dma_addr); + if (!q->entries) goto err_no_mem; - memset(Q->entries, 0, size); - Q->centries = kcalloc(Q->entries_n, sizeof(struct cmdQ_ce), - GFP_KERNEL); - if (!Q->centries) + memset(q->entries, 0, size); + size = sizeof(struct cmdQ_ce) * q->size; + q->centries = kmalloc(size, GFP_KERNEL); + if (!q->centries) goto err_no_mem; + memset(q->centries, 0, size); } + /* + * CommandQ 0 handles Ethernet and TOE packets, while queue 1 is TOE + * only. For queue 0 set the stop threshold so we can handle one more + * packet from each port, plus reserve an additional 24 entries for + * Ethernet packets only. Queue 1 never suspends nor do we reserve + * space for Ethernet packets. + */ + sge->cmdQ[0].stop_thres = sge->adapter->params.nports * + (MAX_SKB_FRAGS + 1); return 0; err_no_mem: @@ -569,9 +490,9 @@ static inline void setup_ring_params(struct adapter *adapter, u64 addr, u32 size, int base_reg_lo, int base_reg_hi, int size_reg) { - t1_write_reg_4(adapter, base_reg_lo, (u32)addr); - t1_write_reg_4(adapter, base_reg_hi, addr >> 32); - t1_write_reg_4(adapter, size_reg, size); + writel((u32)addr, adapter->regs + base_reg_lo); + writel(addr >> 32, adapter->regs + base_reg_hi); + writel(size, adapter->regs + size_reg); } /* @@ -585,29 +506,11 @@ void t1_set_vlan_accel(struct adapter *adapter, int on_off) if (on_off) sge->sge_control |= F_VLAN_XTRACT; if (adapter->open_device_map) { - t1_write_reg_4(adapter, A_SG_CONTROL, sge->sge_control); - t1_read_reg_4(adapter, A_SG_CONTROL); /* flush */ + writel(sge->sge_control, adapter->regs + A_SG_CONTROL); + readl(adapter->regs + A_SG_CONTROL); /* flush */ } } -/* - * Sets the interrupt latency timer when the adaptive Rx coalescing - * is turned off. Do nothing when it is turned on again. - * - * This routine relies on the fact that the caller has already set - * the adaptive policy in adapter->sge_params before calling it. -*/ -int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) -{ - if (!p->coalesce_enable) { - u32 newTimer = p->rx_coalesce_usecs * - (board_info(sge->adapter)->clock_core / 1000000); - - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, newTimer); - } - return 0; -} - /* * Programs the various SGE registers. However, the engine is not yet enabled, * but sge->sge_control is setup and ready to go. @@ -615,67 +518,40 @@ int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) static void configure_sge(struct sge *sge, struct sge_params *p) { struct adapter *ap = sge->adapter; - int i; - - t1_write_reg_4(ap, A_SG_CONTROL, 0); - setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].entries_n, + + writel(0, ap->regs + A_SG_CONTROL); + setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].size, A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE); - setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].entries_n, + setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].size, A_SG_CMD1BASELWR, A_SG_CMD1BASEUPR, A_SG_CMD1SIZE); setup_ring_params(ap, sge->freelQ[0].dma_addr, - sge->freelQ[0].entries_n, A_SG_FL0BASELWR, + sge->freelQ[0].size, A_SG_FL0BASELWR, A_SG_FL0BASEUPR, A_SG_FL0SIZE); setup_ring_params(ap, sge->freelQ[1].dma_addr, - sge->freelQ[1].entries_n, A_SG_FL1BASELWR, + sge->freelQ[1].size, A_SG_FL1BASELWR, A_SG_FL1BASEUPR, A_SG_FL1SIZE); /* The threshold comparison uses <. */ - t1_write_reg_4(ap, A_SG_FLTHRESHOLD, SGE_RX_SM_BUF_SIZE + 1); + writel(SGE_RX_SM_BUF_SIZE + 1, ap->regs + A_SG_FLTHRESHOLD); - setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.entries_n, - A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); - t1_write_reg_4(ap, A_SG_RSPQUEUECREDIT, (u32)sge->respQ.entries_n); + setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.size, + A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); + writel((u32)sge->respQ.size - 1, ap->regs + A_SG_RSPQUEUECREDIT); sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE | F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE | V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE | + F_DISABLE_FL0_GTS | F_DISABLE_FL1_GTS | V_RX_PKT_OFFSET(sge->rx_pkt_pad); #if defined(__BIG_ENDIAN_BITFIELD) sge->sge_control |= F_ENABLE_BIG_ENDIAN; #endif - /* - * Initialize the SGE Interrupt Timer arrray: - * intrtimer[0] = (SGE_INTRTIMER0) usec - * intrtimer[0intrtimer[0] = board_info(sge->adapter)->clock_core / 1000000; - for (i = 1; i < SGE_INTR_LATBUCKETS; ++i) { - sge->intrtimer[i] = SGE_INTRTIMER0 + (2 * i); - sge->intrtimer[i] *= sge->intrtimer[0]; - } - for (i = SGE_INTR_LATBUCKETS; i < SGE_INTR_MAXBUCKETS - 1; ++i) { - sge->intrtimer[i] = (i - 3) * 6; - sge->intrtimer[i] *= sge->intrtimer[0]; - } - sge->intrtimer[SGE_INTR_MAXBUCKETS - 1] = - sge->intrtimer[0] * SGE_INTRTIMER1; - /* Initialize resource timer */ - sge->intrtimer_nres = sge->intrtimer[0] * SGE_INTRTIMER_NRES; - /* Finally finish initialization of intrtimer[0] */ - sge->intrtimer[0] *= SGE_INTRTIMER0; - /* Initialize for a throughput oriented workload */ - sge->currIndex = SGE_INTR_MAXBUCKETS - 1; + /* Initialize no-resource timer */ + sge->intrtimer_nres = SGE_INTRTIMER_NRES * core_ticks_per_usec(ap); - if (p->coalesce_enable) - t1_write_reg_4(ap, A_SG_INTRTIMER, - sge->intrtimer[sge->currIndex]); - else - t1_sge_set_coalesce_params(sge, p); + t1_sge_set_coalesce_params(sge, p); } /* @@ -684,7 +560,998 @@ static void configure_sge(struct sge *sge, struct sge_params *p) static inline unsigned int jumbo_payload_capacity(const struct sge *sge) { return sge->freelQ[sge->jumbo_fl].rx_buffer_size - - sizeof(struct cpl_rx_data) - SGE_RX_OFFSET + sge->rx_pkt_pad; + sge->freelQ[sge->jumbo_fl].dma_offset - + sizeof(struct cpl_rx_data); +} + +/* + * Frees all SGE related resources and the sge structure itself + */ +void t1_sge_destroy(struct sge *sge) +{ + if (sge->espibug_skb) + kfree_skb(sge->espibug_skb); + + free_tx_resources(sge); + free_rx_resources(sge); + kfree(sge); +} + +/* + * Allocates new RX buffers on the freelist Q (and tracks them on the freelist + * context Q) until the Q is full or alloc_skb fails. + * + * It is possible that the generation bits already match, indicating that the + * buffer is already valid and nothing needs to be done. This happens when we + * copied a received buffer into a new sk_buff during the interrupt processing. + * + * If the SGE doesn't automatically align packets properly (!sge->rx_pkt_pad), + * we specify a RX_OFFSET in order to make sure that the IP header is 4B + * aligned. + */ +static void refill_free_list(struct sge *sge, struct freelQ *q) +{ + struct pci_dev *pdev = sge->adapter->pdev; + struct freelQ_ce *ce = &q->centries[q->pidx]; + struct freelQ_e *e = &q->entries[q->pidx]; + unsigned int dma_len = q->rx_buffer_size - q->dma_offset; + + + while (q->credits < q->size) { + struct sk_buff *skb; + dma_addr_t mapping; + + skb = alloc_skb(q->rx_buffer_size, GFP_ATOMIC); + if (!skb) + break; + + skb_reserve(skb, q->dma_offset); + mapping = pci_map_single(pdev, skb->data, dma_len, + PCI_DMA_FROMDEVICE); + ce->skb = skb; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, dma_len); + e->addr_lo = (u32)mapping; + e->addr_hi = (u64)mapping >> 32; + e->len_gen = V_CMD_LEN(dma_len) | V_CMD_GEN1(q->genbit); + wmb(); + e->gen2 = V_CMD_GEN2(q->genbit); + + e++; + ce++; + if (++q->pidx == q->size) { + q->pidx = 0; + q->genbit ^= 1; + ce = q->centries; + e = q->entries; + } + q->credits++; + } + +} + +/* + * Calls refill_free_list for both free lists. If we cannot fill at least 1/4 + * of both rings, we go into 'few interrupt mode' in order to give the system + * time to free up resources. + */ +static void freelQs_empty(struct sge *sge) +{ + struct adapter *adapter = sge->adapter; + u32 irq_reg = readl(adapter->regs + A_SG_INT_ENABLE); + u32 irqholdoff_reg; + + refill_free_list(sge, &sge->freelQ[0]); + refill_free_list(sge, &sge->freelQ[1]); + + if (sge->freelQ[0].credits > (sge->freelQ[0].size >> 2) && + sge->freelQ[1].credits > (sge->freelQ[1].size >> 2)) { + irq_reg |= F_FL_EXHAUSTED; + irqholdoff_reg = sge->fixed_intrtimer; + } else { + /* Clear the F_FL_EXHAUSTED interrupts for now */ + irq_reg &= ~F_FL_EXHAUSTED; + irqholdoff_reg = sge->intrtimer_nres; + } + writel(irqholdoff_reg, adapter->regs + A_SG_INTRTIMER); + writel(irq_reg, adapter->regs + A_SG_INT_ENABLE); + + /* We reenable the Qs to force a freelist GTS interrupt later */ + doorbell_pio(adapter, F_FL0_ENABLE | F_FL1_ENABLE); +} + +#define SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA) +#define SGE_INT_FATAL (F_RESPQ_OVERFLOW | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) +#define SGE_INT_ENABLE (F_RESPQ_EXHAUSTED | F_RESPQ_OVERFLOW | \ + F_FL_EXHAUSTED | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) + +/* + * Disable SGE Interrupts + */ +void t1_sge_intr_disable(struct sge *sge) +{ + u32 val = readl(sge->adapter->regs + A_PL_ENABLE); + + writel(val & ~SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_ENABLE); + writel(0, sge->adapter->regs + A_SG_INT_ENABLE); +} + +/* + * Enable SGE interrupts. + */ +void t1_sge_intr_enable(struct sge *sge) +{ + u32 en = SGE_INT_ENABLE; + u32 val = readl(sge->adapter->regs + A_PL_ENABLE); + + if (sge->adapter->flags & TSO_CAPABLE) + en &= ~F_PACKET_TOO_BIG; + writel(en, sge->adapter->regs + A_SG_INT_ENABLE); + writel(val | SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_ENABLE); +} + +/* + * Clear SGE interrupts. + */ +void t1_sge_intr_clear(struct sge *sge) +{ + writel(SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_CAUSE); + writel(0xffffffff, sge->adapter->regs + A_SG_INT_CAUSE); +} + +/* + * SGE 'Error' interrupt handler + */ +int t1_sge_intr_error_handler(struct sge *sge) +{ + struct adapter *adapter = sge->adapter; + u32 cause = readl(adapter->regs + A_SG_INT_CAUSE); + + if (adapter->flags & TSO_CAPABLE) + cause &= ~F_PACKET_TOO_BIG; + if (cause & F_RESPQ_EXHAUSTED) + sge->stats.respQ_empty++; + if (cause & F_RESPQ_OVERFLOW) { + sge->stats.respQ_overflow++; + CH_ALERT("%s: SGE response queue overflow\n", + adapter->name); + } + if (cause & F_FL_EXHAUSTED) { + sge->stats.freelistQ_empty++; + freelQs_empty(sge); + } + if (cause & F_PACKET_TOO_BIG) { + sge->stats.pkt_too_big++; + CH_ALERT("%s: SGE max packet size exceeded\n", + adapter->name); + } + if (cause & F_PACKET_MISMATCH) { + sge->stats.pkt_mismatch++; + CH_ALERT("%s: SGE packet mismatch\n", adapter->name); + } + if (cause & SGE_INT_FATAL) + t1_fatal_err(adapter); + + writel(cause, adapter->regs + A_SG_INT_CAUSE); + return 0; +} + +const struct sge_intr_counts *t1_sge_get_intr_counts(struct sge *sge) +{ + return &sge->stats; +} + +const struct sge_port_stats *t1_sge_get_port_stats(struct sge *sge, int port) +{ + return &sge->port_stats[port]; +} + +/** + * recycle_fl_buf - recycle a free list buffer + * @fl: the free list + * @idx: index of buffer to recycle + * + * Recycles the specified buffer on the given free list by adding it at + * the next available slot on the list. + */ +static void recycle_fl_buf(struct freelQ *fl, int idx) +{ + struct freelQ_e *from = &fl->entries[idx]; + struct freelQ_e *to = &fl->entries[fl->pidx]; + + fl->centries[fl->pidx] = fl->centries[idx]; + to->addr_lo = from->addr_lo; + to->addr_hi = from->addr_hi; + to->len_gen = G_CMD_LEN(from->len_gen) | V_CMD_GEN1(fl->genbit); + wmb(); + to->gen2 = V_CMD_GEN2(fl->genbit); + fl->credits++; + + if (++fl->pidx == fl->size) { + fl->pidx = 0; + fl->genbit ^= 1; + } +} + +/** + * get_packet - return the next ingress packet buffer + * @pdev: the PCI device that received the packet + * @fl: the SGE free list holding the packet + * @len: the actual packet length, excluding any SGE padding + * @dma_pad: padding at beginning of buffer left by SGE DMA + * @skb_pad: padding to be used if the packet is copied + * @copy_thres: length threshold under which a packet should be copied + * @drop_thres: # of remaining buffers before we start dropping packets + * + * Get the next packet from a free list and complete setup of the + * sk_buff. If the packet is small we make a copy and recycle the + * original buffer, otherwise we use the original buffer itself. If a + * positive drop threshold is supplied packets are dropped and their + * buffers recycled if (a) the number of remaining buffers is under the + * threshold and the packet is too big to copy, or (b) the packet should + * be copied but there is no memory for the copy. + */ +static inline struct sk_buff *get_packet(struct pci_dev *pdev, + struct freelQ *fl, unsigned int len, + int dma_pad, int skb_pad, + unsigned int copy_thres, + unsigned int drop_thres) +{ + struct sk_buff *skb; + struct freelQ_ce *ce = &fl->centries[fl->cidx]; + + if (len < copy_thres) { + skb = alloc_skb(len + skb_pad, GFP_ATOMIC); + if (likely(skb != NULL)) { + skb_reserve(skb, skb_pad); + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + memcpy(skb->data, ce->skb->data + dma_pad, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + } else if (!drop_thres) + goto use_orig_buf; + + recycle_fl_buf(fl, fl->cidx); + return skb; + } + + if (fl->credits < drop_thres) { + recycle_fl_buf(fl, fl->cidx); + return NULL; + } + +use_orig_buf: + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); + skb = ce->skb; + skb_reserve(skb, dma_pad); + skb_put(skb, len); + return skb; +} + +/** + * unexpected_offload - handle an unexpected offload packet + * @adapter: the adapter + * @fl: the free list that received the packet + * + * Called when we receive an unexpected offload packet (e.g., the TOE + * function is disabled or the card is a NIC). Prints a message and + * recycles the buffer. + */ +static void unexpected_offload(struct adapter *adapter, struct freelQ *fl) +{ + struct freelQ_ce *ce = &fl->centries[fl->cidx]; + struct sk_buff *skb = ce->skb; + + pci_dma_sync_single_for_cpu(adapter->pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); + CH_ERR("%s: unexpected offload packet, cmd %u\n", + adapter->name, *skb->data); + recycle_fl_buf(fl, fl->cidx); +} + +/* + * Write the command descriptors to transmit the given skb starting at + * descriptor pidx with the given generation. + */ +static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb, + unsigned int pidx, unsigned int gen, + struct cmdQ *q) +{ + dma_addr_t mapping; + struct cmdQ_e *e, *e1; + struct cmdQ_ce *ce; + unsigned int i, flags, nfrags = skb_shinfo(skb)->nr_frags; + + mapping = pci_map_single(adapter->pdev, skb->data, + skb->len - skb->data_len, PCI_DMA_TODEVICE); + ce = &q->centries[pidx]; + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); + + flags = F_CMD_DATAVALID | F_CMD_SOP | V_CMD_EOP(nfrags == 0) | + V_CMD_GEN2(gen); + e = &q->entries[pidx]; + e->addr_lo = (u32)mapping; + e->addr_hi = (u64)mapping >> 32; + e->len_gen = V_CMD_LEN(skb->len - skb->data_len) | V_CMD_GEN1(gen); + for (e1 = e, i = 0; nfrags--; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ce++; + e1++; + if (++pidx == q->size) { + pidx = 0; + gen ^= 1; + ce = q->centries; + e1 = q->entries; + } + + mapping = pci_map_page(adapter->pdev, frag->page, + frag->page_offset, frag->size, + PCI_DMA_TODEVICE); + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, frag->size); + + e1->addr_lo = (u32)mapping; + e1->addr_hi = (u64)mapping >> 32; + e1->len_gen = V_CMD_LEN(frag->size) | V_CMD_GEN1(gen); + e1->flags = F_CMD_DATAVALID | V_CMD_EOP(nfrags == 0) | + V_CMD_GEN2(gen); + } + + ce->skb = skb; + wmb(); + e->flags = flags; +} + +/* + * Clean up completed Tx buffers. + */ +static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q) +{ + unsigned int reclaim = q->processed - q->cleaned; + + if (reclaim) { + free_cmdQ_buffers(sge, q, reclaim); + q->cleaned += reclaim; + } +} + +#ifndef SET_ETHTOOL_OPS +# define __netif_rx_complete(dev) netif_rx_complete(dev) +#endif + +/* + * We cannot use the standard netif_rx_schedule_prep() because we have multiple + * ports plus the TOE all multiplexing onto a single response queue, therefore + * accepting new responses cannot depend on the state of any particular port. + * So define our own equivalent that omits the netif_running() test. + */ +static inline int napi_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + + +/** + * sge_rx - process an ingress ethernet packet + * @sge: the sge structure + * @fl: the free list that contains the packet buffer + * @len: the packet length + * + * Process an ingress ethernet pakcet and deliver it to the stack. + */ +static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) +{ + struct sk_buff *skb; + struct cpl_rx_pkt *p; + struct adapter *adapter = sge->adapter; + + sge->stats.ethernet_pkts++; + skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, + sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES, + SGE_RX_DROP_THRES); + if (!skb) { + sge->port_stats[0].rx_drops++; /* charge only port 0 for now */ + return 0; + } + + p = (struct cpl_rx_pkt *)skb->data; + skb_pull(skb, sizeof(*p)); + skb->dev = adapter->port[p->iff].dev; + skb->dev->last_rx = jiffies; + skb->protocol = eth_type_trans(skb, skb->dev); + if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && + skb->protocol == htons(ETH_P_IP) && + (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { + sge->port_stats[p->iff].rx_cso_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else + skb->ip_summed = CHECKSUM_NONE; + + if (unlikely(adapter->vlan_grp && p->vlan_valid)) { + sge->port_stats[p->iff].vlan_xtract++; + if (adapter->params.sge.polling) + vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, + ntohs(p->vlan)); + else + vlan_hwaccel_rx(skb, adapter->vlan_grp, + ntohs(p->vlan)); + } else if (adapter->params.sge.polling) + netif_receive_skb(skb); + else + netif_rx(skb); + return 0; +} + +/* + * Returns true if a command queue has enough available descriptors that + * we can resume Tx operation after temporarily disabling its packet queue. + */ +static inline int enough_free_Tx_descs(const struct cmdQ *q) +{ + unsigned int r = q->processed - q->cleaned; + + return q->in_use - r < (q->size >> 1); +} + +/* + * Called when sufficient space has become available in the SGE command queues + * after the Tx packet schedulers have been suspended to restart the Tx path. + */ +static void restart_tx_queues(struct sge *sge) +{ + struct adapter *adap = sge->adapter; + + if (enough_free_Tx_descs(&sge->cmdQ[0])) { + int i; + + for_each_port(adap, i) { + struct net_device *nd = adap->port[i].dev; + + if (test_and_clear_bit(nd->if_port, + &sge->stopped_tx_queues) && + netif_running(nd)) { + sge->stats.cmdQ_restarted[3]++; + netif_wake_queue(nd); + } + } + } +} + +/* + * update_tx_info is called from the interrupt handler/NAPI to return cmdQ0 + * information. + */ +static unsigned int update_tx_info(struct adapter *adapter, + unsigned int flags, + unsigned int pr0) +{ + struct sge *sge = adapter->sge; + struct cmdQ *cmdq = &sge->cmdQ[0]; + + cmdq->processed += pr0; + + if (flags & F_CMDQ0_ENABLE) { + clear_bit(CMDQ_STAT_RUNNING, &cmdq->status); + + if (cmdq->cleaned + cmdq->in_use != cmdq->processed && + !test_and_set_bit(CMDQ_STAT_LAST_PKT_DB, &cmdq->status)) { + set_bit(CMDQ_STAT_RUNNING, &cmdq->status); + writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); + } + flags &= ~F_CMDQ0_ENABLE; + } + + if (unlikely(sge->stopped_tx_queues != 0)) + restart_tx_queues(sge); + + return flags; +} + +/* + * Process SGE responses, up to the supplied budget. Returns the number of + * responses processed. A negative budget is effectively unlimited. + */ +static int process_responses(struct adapter *adapter, int budget) +{ + struct sge *sge = adapter->sge; + struct respQ *q = &sge->respQ; + struct respQ_e *e = &q->entries[q->cidx]; + int budget_left = budget; + unsigned int flags = 0; + unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; + + + while (likely(budget_left && e->GenerationBit == q->genbit)) { + flags |= e->Qsleeping; + + cmdq_processed[0] += e->Cmdq0CreditReturn; + cmdq_processed[1] += e->Cmdq1CreditReturn; + + /* We batch updates to the TX side to avoid cacheline + * ping-pong of TX state information on MP where the sender + * might run on a different CPU than this function... + */ + if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + cmdq_processed[0] = 0; + } + if (unlikely(cmdq_processed[1] > 16)) { + sge->cmdQ[1].processed += cmdq_processed[1]; + cmdq_processed[1] = 0; + } + if (likely(e->DataValid)) { + struct freelQ *fl = &sge->freelQ[e->FreelistQid]; + + if (unlikely(!e->Sop || !e->Eop)) + BUG(); + if (unlikely(e->Offload)) + unexpected_offload(adapter, fl); + else + sge_rx(sge, fl, e->BufferLength); + + /* + * Note: this depends on each packet consuming a + * single free-list buffer; cf. the BUG above. + */ + if (++fl->cidx == fl->size) + fl->cidx = 0; + if (unlikely(--fl->credits < + fl->size - SGE_FREEL_REFILL_THRESH)) + refill_free_list(sge, fl); + } else + sge->stats.pure_rsps++; + + e++; + if (unlikely(++q->cidx == q->size)) { + q->cidx = 0; + q->genbit ^= 1; + e = q->entries; + } + prefetch(e); + + if (++q->credits > SGE_RESPQ_REPLENISH_THRES) { + writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); + q->credits = 0; + } + --budget_left; + } + + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + sge->cmdQ[1].processed += cmdq_processed[1]; + + budget -= budget_left; + return budget; +} + +/* + * A simpler version of process_responses() that handles only pure (i.e., + * non data-carrying) responses. Such respones are too light-weight to justify + * calling a softirq when using NAPI, so we handle them specially in hard + * interrupt context. The function is called with a pointer to a response, + * which the caller must ensure is a valid pure response. Returns 1 if it + * encounters a valid data-carrying response, 0 otherwise. + */ +static int process_pure_responses(struct adapter *adapter, struct respQ_e *e) +{ + struct sge *sge = adapter->sge; + struct respQ *q = &sge->respQ; + unsigned int flags = 0; + unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; + + do { + flags |= e->Qsleeping; + + cmdq_processed[0] += e->Cmdq0CreditReturn; + cmdq_processed[1] += e->Cmdq1CreditReturn; + + e++; + if (unlikely(++q->cidx == q->size)) { + q->cidx = 0; + q->genbit ^= 1; + e = q->entries; + } + prefetch(e); + + if (++q->credits > SGE_RESPQ_REPLENISH_THRES) { + writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); + q->credits = 0; + } + sge->stats.pure_rsps++; + } while (e->GenerationBit == q->genbit && !e->DataValid); + + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + sge->cmdQ[1].processed += cmdq_processed[1]; + + return e->GenerationBit == q->genbit; +} + +/* + * Handler for new data events when using NAPI. This does not need any locking + * or protection from interrupts as data interrupts are off at this point and + * other adapter interrupts do not interfere. + */ +static int t1_poll(struct net_device *dev, int *budget) +{ + struct adapter *adapter = dev->priv; + int effective_budget = min(*budget, dev->quota); + + int work_done = process_responses(adapter, effective_budget); + *budget -= work_done; + dev->quota -= work_done; + + if (work_done >= effective_budget) + return 1; + + __netif_rx_complete(dev); + + /* + * Because we don't atomically flush the following write it is + * possible that in very rare cases it can reach the device in a way + * that races with a new response being written plus an error interrupt + * causing the NAPI interrupt handler below to return unhandled status + * to the OS. To protect against this would require flushing the write + * and doing both the write and the flush with interrupts off. Way too + * expensive and unjustifiable given the rarity of the race. + */ + writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING); + return 0; +} + +/* + * Returns true if the device is already scheduled for polling. + */ +static inline int napi_is_scheduled(struct net_device *dev) +{ + return test_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +/* + * NAPI version of the main interrupt handler. + */ +static irqreturn_t t1_interrupt_napi(int irq, void *data, struct pt_regs *regs) +{ + int handled; + struct adapter *adapter = data; + struct sge *sge = adapter->sge; + struct respQ *q = &adapter->sge->respQ; + + /* + * Clear the SGE_DATA interrupt first thing. Normally the NAPI + * handler has control of the response queue and the interrupt handler + * can look at the queue reliably only once it knows NAPI is off. + * We can't wait that long to clear the SGE_DATA interrupt because we + * could race with t1_poll rearming the SGE interrupt, so we need to + * clear the interrupt speculatively and really early on. + */ + writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); + + spin_lock(&adapter->async_lock); + if (!napi_is_scheduled(sge->netdev)) { + struct respQ_e *e = &q->entries[q->cidx]; + + if (e->GenerationBit == q->genbit) { + if (e->DataValid || + process_pure_responses(adapter, e)) { + if (likely(napi_schedule_prep(sge->netdev))) + __netif_rx_schedule(sge->netdev); + else + printk(KERN_CRIT + "NAPI schedule failure!\n"); + } else + writel(q->cidx, adapter->regs + A_SG_SLEEPING); + handled = 1; + goto unlock; + } else + writel(q->cidx, adapter->regs + A_SG_SLEEPING); + } else + if (readl(adapter->regs + A_PL_CAUSE) & F_PL_INTR_SGE_DATA) + printk(KERN_ERR "data interrupt while NAPI running\n"); + + handled = t1_slow_intr_handler(adapter); + if (!handled) + sge->stats.unhandled_irqs++; + unlock: + spin_unlock(&adapter->async_lock); + return IRQ_RETVAL(handled != 0); +} + +/* + * Main interrupt handler, optimized assuming that we took a 'DATA' + * interrupt. + * + * 1. Clear the interrupt + * 2. Loop while we find valid descriptors and process them; accumulate + * information that can be processed after the loop + * 3. Tell the SGE at which index we stopped processing descriptors + * 4. Bookkeeping; free TX buffers, ring doorbell if there are any + * outstanding TX buffers waiting, replenish RX buffers, potentially + * reenable upper layers if they were turned off due to lack of TX + * resources which are available again. + * 5. If we took an interrupt, but no valid respQ descriptors was found we + * let the slow_intr_handler run and do error handling. + */ +static irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) +{ + int work_done; + struct respQ_e *e; + struct adapter *adapter = cookie; + struct respQ *Q = &adapter->sge->respQ; + + spin_lock(&adapter->async_lock); + e = &Q->entries[Q->cidx]; + prefetch(e); + + writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); + + if (likely(e->GenerationBit == Q->genbit)) + work_done = process_responses(adapter, -1); + else + work_done = t1_slow_intr_handler(adapter); + + /* + * The unconditional clearing of the PL_CAUSE above may have raced + * with DMA completion and the corresponding generation of a response + * to cause us to miss the resulting data interrupt. The next write + * is also unconditional to recover the missed interrupt and render + * this race harmless. + */ + writel(Q->cidx, adapter->regs + A_SG_SLEEPING); + + if (!work_done) + adapter->sge->stats.unhandled_irqs++; + spin_unlock(&adapter->async_lock); + return IRQ_RETVAL(work_done != 0); +} + +intr_handler_t t1_select_intr_handler(adapter_t *adapter) +{ + return adapter->params.sge.polling ? t1_interrupt_napi : t1_interrupt; +} + +/* + * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. + * + * The code figures out how many entries the sk_buff will require in the + * cmdQ and updates the cmdQ data structure with the state once the enqueue + * has complete. Then, it doesn't access the global structure anymore, but + * uses the corresponding fields on the stack. In conjuction with a spinlock + * around that code, we can make the function reentrant without holding the + * lock when we actually enqueue (which might be expensive, especially on + * architectures with IO MMUs). + * + * This runs with softirqs disabled. + */ +unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid, struct net_device *dev) +{ + struct sge *sge = adapter->sge; + struct cmdQ *q = &sge->cmdQ[qid]; + unsigned int credits, pidx, genbit, count; + + spin_lock(&q->lock); + reclaim_completed_tx(sge, q); + + pidx = q->pidx; + credits = q->size - q->in_use; + count = 1 + skb_shinfo(skb)->nr_frags; + + { /* Ethernet packet */ + if (unlikely(credits < count)) { + netif_stop_queue(dev); + set_bit(dev->if_port, &sge->stopped_tx_queues); + sge->stats.cmdQ_full[3]++; + spin_unlock(&q->lock); + CH_ERR("%s: Tx ring full while queue awake!\n", + adapter->name); + return 1; + } + if (unlikely(credits - count < q->stop_thres)) { + sge->stats.cmdQ_full[3]++; + netif_stop_queue(dev); + set_bit(dev->if_port, &sge->stopped_tx_queues); + } + } + q->in_use += count; + genbit = q->genbit; + q->pidx += count; + if (q->pidx >= q->size) { + q->pidx -= q->size; + q->genbit ^= 1; + } + spin_unlock(&q->lock); + + write_tx_descs(adapter, skb, pidx, genbit, q); + + /* + * We always ring the doorbell for cmdQ1. For cmdQ0, we only ring + * the doorbell if the Q is asleep. There is a natural race, where + * the hardware is going to sleep just after we checked, however, + * then the interrupt handler will detect the outstanding TX packet + * and ring the doorbell for us. + */ + if (qid) + doorbell_pio(adapter, F_CMDQ1_ENABLE); + else { + clear_bit(CMDQ_STAT_LAST_PKT_DB, &q->status); + if (test_and_set_bit(CMDQ_STAT_RUNNING, &q->status) == 0) { + set_bit(CMDQ_STAT_LAST_PKT_DB, &q->status); + writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); + } + } + return 0; +} + +#define MK_ETH_TYPE_MSS(type, mss) (((mss) & 0x3FFF) | ((type) << 14)) + +/* + * eth_hdr_len - return the length of an Ethernet header + * @data: pointer to the start of the Ethernet header + * + * Returns the length of an Ethernet header, including optional VLAN tag. + */ +static inline int eth_hdr_len(const void *data) +{ + const struct ethhdr *e = data; + + return e->h_proto == htons(ETH_P_8021Q) ? VLAN_ETH_HLEN : ETH_HLEN; +} + +/* + * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. + */ +int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct sge_port_stats *st = &adapter->sge->port_stats[dev->if_port]; + struct sge *sge = adapter->sge; + struct cpl_tx_pkt *cpl; + +#ifdef NETIF_F_TSO + if (skb_shinfo(skb)->tso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + + st->tso++; + + eth_type = skb->nh.raw - skb->data == ETH_HLEN ? + CPL_ETH_II : CPL_ETH_II_VLAN; + + hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr)); + hdr->opcode = CPL_TX_PKT_LSO; + hdr->ip_csum_dis = hdr->l4_csum_dis = 0; + hdr->ip_hdr_words = skb->nh.iph->ihl; + hdr->tcp_hdr_words = skb->h.th->doff; + hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, + skb_shinfo(skb)->tso_size)); + hdr->len = htonl(skb->len - sizeof(*hdr)); + cpl = (struct cpl_tx_pkt *)hdr; + sge->stats.tx_lso_pkts++; + } else +#endif + { + /* + * Packets shorter than ETH_HLEN can break the MAC, drop them + * early. Also, we may get oversized packets because some + * parts of the kernel don't handle our unusual hard_header_len + * right, drop those too. + */ + if (unlikely(skb->len < ETH_HLEN || + skb->len > dev->mtu + eth_hdr_len(skb->data))) { + dev_kfree_skb_any(skb); + return NET_XMIT_SUCCESS; + } + + /* + * We are using a non-standard hard_header_len and some kernel + * components, such as pktgen, do not handle it right. + * Complain when this happens but try to fix things up. + */ + if (unlikely(skb_headroom(skb) < + dev->hard_header_len - ETH_HLEN)) { + struct sk_buff *orig_skb = skb; + + if (net_ratelimit()) + printk(KERN_ERR "%s: inadequate headroom in " + "Tx packet\n", dev->name); + skb = skb_realloc_headroom(skb, sizeof(*cpl)); + dev_kfree_skb_any(orig_skb); + if (!skb) + return -ENOMEM; + } + + if (!(adapter->flags & UDP_CSUM_CAPABLE) && + skb->ip_summed == CHECKSUM_HW && + skb->nh.iph->protocol == IPPROTO_UDP) + if (unlikely(skb_checksum_help(skb, 0))) { + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + /* Hmmm, assuming to catch the gratious arp... and we'll use + * it to flush out stuck espi packets... + */ + if (unlikely(!adapter->sge->espibug_skb)) { + if (skb->protocol == htons(ETH_P_ARP) && + skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) { + adapter->sge->espibug_skb = skb; + /* We want to re-use this skb later. We + * simply bump the reference count and it + * will not be freed... + */ + skb = skb_get(skb); + } + } + + cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl)); + cpl->opcode = CPL_TX_PKT; + cpl->ip_csum_dis = 1; /* SW calculates IP csum */ + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; + /* the length field isn't used so don't bother setting it */ + + st->tx_cso += (skb->ip_summed == CHECKSUM_HW); + sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW); + sge->stats.tx_reg_pkts++; + } + cpl->iff = dev->if_port; + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (adapter->vlan_grp && vlan_tx_tag_present(skb)) { + cpl->vlan_valid = 1; + cpl->vlan = htons(vlan_tx_tag_get(skb)); + st->vlan_insert++; + } else +#endif + cpl->vlan_valid = 0; + + dev->trans_start = jiffies; + return t1_sge_tx(skb, adapter, 0, dev); +} + +/* + * Callback for the Tx buffer reclaim timer. Runs with softirqs disabled. + */ +static void sge_tx_reclaim_cb(unsigned long data) +{ + int i; + struct sge *sge = (struct sge *)data; + + for (i = 0; i < SGE_CMDQ_N; ++i) { + struct cmdQ *q = &sge->cmdQ[i]; + + if (!spin_trylock(&q->lock)) + continue; + + reclaim_completed_tx(sge, q); + if (i == 0 && q->in_use) /* flush pending credits */ + writel(F_CMDQ0_ENABLE, + sge->adapter->regs + A_SG_DOORBELL); + + spin_unlock(&q->lock); + } + mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); +} + +/* + * Propagate changes of the SGE coalescing parameters to the HW. + */ +int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) +{ + sge->netdev->poll = t1_poll; + sge->fixed_intrtimer = p->rx_coalesce_usecs * + core_ticks_per_usec(sge->adapter); + writel(sge->fixed_intrtimer, sge->adapter->regs + A_SG_INTRTIMER); + return 0; } /* @@ -712,740 +1579,106 @@ int t1_sge_configure(struct sge *sge, struct sge_params *p) } /* - * Frees all SGE related resources and the sge structure itself + * Disables the DMA engine. */ -void t1_sge_destroy(struct sge *sge) +void t1_sge_stop(struct sge *sge) { - if (sge->pskb) - dev_kfree_skb(sge->pskb); - free_tx_resources(sge); - free_rx_resources(sge); - kfree(sge); + writel(0, sge->adapter->regs + A_SG_CONTROL); + (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ + if (is_T2(sge->adapter)) + del_timer_sync(&sge->espibug_timer); + del_timer_sync(&sge->tx_reclaim_timer); } /* - * Allocates new RX buffers on the freelist Q (and tracks them on the freelist - * context Q) until the Q is full or alloc_skb fails. - * - * It is possible that the generation bits already match, indicating that the - * buffer is already valid and nothing needs to be done. This happens when we - * copied a received buffer into a new sk_buff during the interrupt processing. - * - * If the SGE doesn't automatically align packets properly (!sge->rx_pkt_pad), - * we specify a RX_OFFSET in order to make sure that the IP header is 4B - * aligned. + * Enables the DMA engine. */ -static void refill_free_list(struct sge *sge, struct freelQ *Q) +void t1_sge_start(struct sge *sge) { - struct pci_dev *pdev = sge->adapter->pdev; - struct freelQ_ce *ce = &Q->centries[Q->pidx]; - struct freelQ_e *e = &Q->entries[Q->pidx]; - unsigned int dma_len = Q->rx_buffer_size - Q->dma_offset; - - - while (Q->credits < Q->entries_n) { - if (e->GenerationBit != Q->genbit) { - struct sk_buff *skb; - dma_addr_t mapping; - - skb = alloc_skb(Q->rx_buffer_size, GFP_ATOMIC); - if (!skb) - break; - if (Q->dma_offset) - skb_reserve(skb, Q->dma_offset); - mapping = pci_map_single(pdev, skb->data, dma_len, - PCI_DMA_FROMDEVICE); - ce->skb = skb; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, dma_len); - e->AddrLow = (u32)mapping; - e->AddrHigh = (u64)mapping >> 32; - e->BufferLength = dma_len; - e->GenerationBit = e->GenerationBit2 = Q->genbit; - } - - e++; - ce++; - if (++Q->pidx == Q->entries_n) { - Q->pidx = 0; - Q->genbit ^= 1; - ce = Q->centries; - e = Q->entries; - } - Q->credits++; - } - -} - -/* - * Calls refill_free_list for both freelist Qs. If we cannot - * fill at least 1/4 of both Qs, we go into 'few interrupt mode' in order - * to give the system time to free up resources. - */ -static void freelQs_empty(struct sge *sge) -{ - u32 irq_reg = t1_read_reg_4(sge->adapter, A_SG_INT_ENABLE); - u32 irqholdoff_reg; - refill_free_list(sge, &sge->freelQ[0]); refill_free_list(sge, &sge->freelQ[1]); - if (sge->freelQ[0].credits > (sge->freelQ[0].entries_n >> 2) && - sge->freelQ[1].credits > (sge->freelQ[1].entries_n >> 2)) { - irq_reg |= F_FL_EXHAUSTED; - irqholdoff_reg = sge->intrtimer[sge->currIndex]; - } else { - /* Clear the F_FL_EXHAUSTED interrupts for now */ - irq_reg &= ~F_FL_EXHAUSTED; - irqholdoff_reg = sge->intrtimer_nres; - } - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, irqholdoff_reg); - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, irq_reg); + writel(sge->sge_control, sge->adapter->regs + A_SG_CONTROL); + doorbell_pio(sge->adapter, F_FL0_ENABLE | F_FL1_ENABLE); + (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ - /* We reenable the Qs to force a freelist GTS interrupt later */ - doorbell_pio(sge, F_FL0_ENABLE | F_FL1_ENABLE); -} + mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); -#define SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA) -#define SGE_INT_FATAL (F_RESPQ_OVERFLOW | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) -#define SGE_INT_ENABLE (F_RESPQ_EXHAUSTED | F_RESPQ_OVERFLOW | \ - F_FL_EXHAUSTED | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) - -/* - * Disable SGE Interrupts - */ -void t1_sge_intr_disable(struct sge *sge) -{ - u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); - - t1_write_reg_4(sge->adapter, A_PL_ENABLE, val & ~SGE_PL_INTR_MASK); - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, 0); + if (is_T2(sge->adapter)) + mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); } /* - * Enable SGE interrupts. + * Callback for the T2 ESPI 'stuck packet feature' workaorund */ -void t1_sge_intr_enable(struct sge *sge) +static void espibug_workaround(void *data) { - u32 en = SGE_INT_ENABLE; - u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); - - if (sge->adapter->flags & TSO_CAPABLE) - en &= ~F_PACKET_TOO_BIG; - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, en); - t1_write_reg_4(sge->adapter, A_PL_ENABLE, val | SGE_PL_INTR_MASK); -} - -/* - * Clear SGE interrupts. - */ -void t1_sge_intr_clear(struct sge *sge) -{ - t1_write_reg_4(sge->adapter, A_PL_CAUSE, SGE_PL_INTR_MASK); - t1_write_reg_4(sge->adapter, A_SG_INT_CAUSE, 0xffffffff); -} - -/* - * SGE 'Error' interrupt handler - */ -int t1_sge_intr_error_handler(struct sge *sge) -{ - struct adapter *adapter = sge->adapter; - u32 cause = t1_read_reg_4(adapter, A_SG_INT_CAUSE); - - if (adapter->flags & TSO_CAPABLE) - cause &= ~F_PACKET_TOO_BIG; - if (cause & F_RESPQ_EXHAUSTED) - sge->intr_cnt.respQ_empty++; - if (cause & F_RESPQ_OVERFLOW) { - sge->intr_cnt.respQ_overflow++; - CH_ALERT("%s: SGE response queue overflow\n", - adapter->name); - } - if (cause & F_FL_EXHAUSTED) { - sge->intr_cnt.freelistQ_empty++; - freelQs_empty(sge); - } - if (cause & F_PACKET_TOO_BIG) { - sge->intr_cnt.pkt_too_big++; - CH_ALERT("%s: SGE max packet size exceeded\n", - adapter->name); - } - if (cause & F_PACKET_MISMATCH) { - sge->intr_cnt.pkt_mismatch++; - CH_ALERT("%s: SGE packet mismatch\n", adapter->name); - } - if (cause & SGE_INT_FATAL) - t1_fatal_err(adapter); - - t1_write_reg_4(adapter, A_SG_INT_CAUSE, cause); - return 0; -} - -/* - * The following code is copied from 2.6, where the skb_pull is doing the - * right thing and only pulls ETH_HLEN. - * - * Determine the packet's protocol ID. The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. - */ -static unsigned short sge_eth_type_trans(struct sk_buff *skb, - struct net_device *dev) -{ - struct ethhdr *eth; - unsigned char *rawp; - - skb->mac.raw = skb->data; - skb_pull(skb, ETH_HLEN); - eth = (struct ethhdr *)skb->mac.raw; - - if (*eth->h_dest&1) { - if(memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } - - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. - */ - - else if (1 /*dev->flags&IFF_PROMISC*/) - { - if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) - skb->pkt_type=PACKET_OTHERHOST; - } - - if (ntohs(eth->h_proto) >= 1536) - return eth->h_proto; - - rawp = skb->data; - - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *)rawp == 0xFFFF) - return htons(ETH_P_802_3); - - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); -} - -/* - * Prepare the received buffer and pass it up the stack. If it is small enough - * and allocation doesn't fail, we use a new sk_buff and copy the content. - */ -static unsigned int t1_sge_rx(struct sge *sge, struct freelQ *Q, - unsigned int len, unsigned int offload) -{ - struct sk_buff *skb; - struct adapter *adapter = sge->adapter; - struct freelQ_ce *ce = &Q->centries[Q->cidx]; - - if (len <= SGE_RX_COPY_THRESHOLD && - (skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC))) { - struct freelQ_e *e; - char *src = ce->skb->data; - - pci_dma_sync_single_for_cpu(adapter->pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - if (!offload) { - skb_reserve(skb, NET_IP_ALIGN); - src += sge->rx_pkt_pad; - } - memcpy(skb->data, src, len); - - /* Reuse the entry. */ - e = &Q->entries[Q->cidx]; - e->GenerationBit ^= 1; - e->GenerationBit2 ^= 1; - } else { - pci_unmap_single(adapter->pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - skb = ce->skb; - if (!offload && sge->rx_pkt_pad) - __skb_pull(skb, sge->rx_pkt_pad); - } - - skb_put(skb, len); - - - if (unlikely(offload)) { - { - printk(KERN_ERR - "%s: unexpected offloaded packet, cmd %u\n", - adapter->name, *skb->data); - dev_kfree_skb_any(skb); - } - } else { - struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)skb->data; - - skb_pull(skb, sizeof(*p)); - skb->dev = adapter->port[p->iff].dev; - skb->dev->last_rx = jiffies; - skb->protocol = sge_eth_type_trans(skb, skb->dev); - if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && - skb->protocol == htons(ETH_P_IP) && - (skb->data[9] == IPPROTO_TCP || - skb->data[9] == IPPROTO_UDP)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; - if (adapter->vlan_grp && p->vlan_valid) - vlan_hwaccel_rx(skb, adapter->vlan_grp, - ntohs(p->vlan)); - else - netif_rx(skb); - } - - if (++Q->cidx == Q->entries_n) - Q->cidx = 0; - - if (unlikely(--Q->credits < Q->entries_n - SGE_FREEL_REFILL_THRESH)) - refill_free_list(sge, Q); - return 1; -} - - -/* - * Adaptive interrupt timer logic to keep the CPU utilization to - * manageable levels. Basically, as the Average Packet Size (APS) - * gets higher, the interrupt latency setting gets longer. Every - * SGE_INTR_BUCKETSIZE (of 100B) causes a bump of 2usec to the - * base value of SGE_INTRTIMER0. At large values of payload the - * latency hits the ceiling value of SGE_INTRTIMER1 stored at - * index SGE_INTR_MAXBUCKETS-1 in sge->intrtimer[]. - * - * sge->currIndex caches the last index to save unneeded PIOs. - */ -static inline void update_intr_timer(struct sge *sge, unsigned int avg_payload) -{ - unsigned int newIndex; - - newIndex = avg_payload / SGE_INTR_BUCKETSIZE; - if (newIndex > SGE_INTR_MAXBUCKETS - 1) { - newIndex = SGE_INTR_MAXBUCKETS - 1; - } - /* Save a PIO with this check....maybe */ - if (newIndex != sge->currIndex) { - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, - sge->intrtimer[newIndex]); - sge->currIndex = newIndex; - sge->adapter->params.sge.last_rx_coalesce_raw = - sge->intrtimer[newIndex]; - } -} - -/* - * Returns true if command queue q_num has enough available descriptors that - * we can resume Tx operation after temporarily disabling its packet queue. - */ -static inline int enough_free_Tx_descs(struct sge *sge, int q_num) -{ - return atomic_read(&sge->cmdQ[q_num].credits) > - (sge->cmdQ[q_num].entries_n >> 2); -} - -/* - * Main interrupt handler, optimized assuming that we took a 'DATA' - * interrupt. - * - * 1. Clear the interrupt - * 2. Loop while we find valid descriptors and process them; accumulate - * information that can be processed after the loop - * 3. Tell the SGE at which index we stopped processing descriptors - * 4. Bookkeeping; free TX buffers, ring doorbell if there are any - * outstanding TX buffers waiting, replenish RX buffers, potentially - * reenable upper layers if they were turned off due to lack of TX - * resources which are available again. - * 5. If we took an interrupt, but no valid respQ descriptors was found we - * let the slow_intr_handler run and do error handling. - */ -irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) -{ - struct net_device *netdev; - struct adapter *adapter = cookie; + struct adapter *adapter = (struct adapter *)data; struct sge *sge = adapter->sge; - struct respQ *Q = &sge->respQ; - unsigned int credits = Q->credits, flags = 0, ret = 0; - unsigned int tot_rxpayload = 0, tot_txpayload = 0, n_rx = 0, n_tx = 0; - unsigned int credits_pend[SGE_CMDQ_N] = { 0, 0 }; - struct respQ_e *e = &Q->entries[Q->cidx]; - prefetch(e); + if (netif_running(adapter->port[0].dev)) { + struct sk_buff *skb = sge->espibug_skb; - t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_SGE_DATA); + u32 seop = t1_espi_get_mon(adapter, 0x930, 0); - - while (e->GenerationBit == Q->genbit) { - if (--credits < SGE_RESPQ_REPLENISH_THRES) { - u32 n = Q->entries_n - credits - 1; - - t1_write_reg_4(adapter, A_SG_RSPQUEUECREDIT, n); - credits += n; - } - if (likely(e->DataValid)) { - if (!e->Sop || !e->Eop) - BUG(); - t1_sge_rx(sge, &sge->freelQ[e->FreelistQid], - e->BufferLength, e->Offload); - tot_rxpayload += e->BufferLength; - ++n_rx; - } - flags |= e->Qsleeping; - credits_pend[0] += e->Cmdq0CreditReturn; - credits_pend[1] += e->Cmdq1CreditReturn; - -#ifdef CONFIG_SMP - /* - * If enough cmdQ0 buffers have finished DMAing free them so - * anyone that may be waiting for their release can continue. - * We do this only on MP systems to allow other CPUs to proceed - * promptly. UP systems can wait for the free_cmdQ_buffers() - * calls after this loop as the sole CPU is currently busy in - * this loop. - */ - if (unlikely(credits_pend[0] > SGE_FREEL_REFILL_THRESH)) { - free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], - &tot_txpayload); - n_tx += credits_pend[0]; - credits_pend[0] = 0; - } -#endif - ret++; - e++; - if (unlikely(++Q->cidx == Q->entries_n)) { - Q->cidx = 0; - Q->genbit ^= 1; - e = Q->entries; - } - } - - Q->credits = credits; - t1_write_reg_4(adapter, A_SG_SLEEPING, Q->cidx); - - if (credits_pend[0]) - free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], &tot_txpayload); - if (credits_pend[1]) - free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1], &tot_txpayload); - - /* Do any coalescing and interrupt latency timer adjustments */ - if (adapter->params.sge.coalesce_enable) { - unsigned int avg_txpayload = 0, avg_rxpayload = 0; - - n_tx += credits_pend[0] + credits_pend[1]; - - /* - * Choose larger avg. payload size to increase - * throughput and reduce [CPU util., intr/s.] - * - * Throughput behavior favored in mixed-mode. - */ - if (n_tx) - avg_txpayload = tot_txpayload/n_tx; - if (n_rx) - avg_rxpayload = tot_rxpayload/n_rx; - - if (n_tx && avg_txpayload > avg_rxpayload){ - update_intr_timer(sge, avg_txpayload); - } else if (n_rx) { - update_intr_timer(sge, avg_rxpayload); - } - } - - if (flags & F_CMDQ0_ENABLE) { - struct cmdQ *cmdQ = &sge->cmdQ[0]; - - atomic_set(&cmdQ->asleep, 1); - if (atomic_read(&cmdQ->pio_pidx) != cmdQ->pidx) { - doorbell_pio(sge, F_CMDQ0_ENABLE); - atomic_set(&cmdQ->pio_pidx, cmdQ->pidx); - } - } - if (unlikely(flags & (F_FL0_ENABLE | F_FL1_ENABLE))) - freelQs_empty(sge); - - netdev = adapter->port[0].dev; - if (unlikely(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) && - enough_free_Tx_descs(sge, 0) && - enough_free_Tx_descs(sge, 1))) { - netif_wake_queue(netdev); - } - if (unlikely(!ret)) - ret = t1_slow_intr_handler(adapter); - - return IRQ_RETVAL(ret != 0); -} - -/* - * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. - * - * The code figures out how many entries the sk_buff will require in the - * cmdQ and updates the cmdQ data structure with the state once the enqueue - * has complete. Then, it doesn't access the global structure anymore, but - * uses the corresponding fields on the stack. In conjuction with a spinlock - * around that code, we can make the function reentrant without holding the - * lock when we actually enqueue (which might be expensive, especially on - * architectures with IO MMUs). - */ -static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, - unsigned int qid) -{ - struct sge *sge = adapter->sge; - struct cmdQ *Q = &sge->cmdQ[qid]; - struct cmdQ_e *e; - struct cmdQ_ce *ce; - dma_addr_t mapping; - unsigned int credits, pidx, genbit; - - unsigned int count = 1 + skb_shinfo(skb)->nr_frags; - - /* - * Coming from the timer - */ - if ((skb == sge->pskb)) { - /* - * Quit if any cmdQ activities - */ - if (!spin_trylock(&Q->Qlock)) - return 0; - if (atomic_read(&Q->credits) != Q->entries_n) { - spin_unlock(&Q->Qlock); - return 0; - } - } - else - spin_lock(&Q->Qlock); - - genbit = Q->genbit; - pidx = Q->pidx; - credits = atomic_read(&Q->credits); - - credits -= count; - atomic_sub(count, &Q->credits); - Q->pidx += count; - if (Q->pidx >= Q->entries_n) { - Q->pidx -= Q->entries_n; - Q->genbit ^= 1; - } - - if (unlikely(credits < (MAX_SKB_FRAGS + 1))) { - sge->intr_cnt.cmdQ_full[qid]++; - netif_stop_queue(adapter->port[0].dev); - } - spin_unlock(&Q->Qlock); - - mapping = pci_map_single(adapter->pdev, skb->data, - skb->len - skb->data_len, PCI_DMA_TODEVICE); - ce = &Q->centries[pidx]; - ce->skb = NULL; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); - ce->single = 1; - - e = &Q->entries[pidx]; - e->Sop = 1; - e->DataValid = 1; - e->BufferLength = skb->len - skb->data_len; - e->AddrHigh = (u64)mapping >> 32; - e->AddrLow = (u32)mapping; - - if (--count > 0) { - unsigned int i; - - e->Eop = 0; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; - - for (i = 0; i < count; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - ce++; e++; - if (++pidx == Q->entries_n) { - pidx = 0; - genbit ^= 1; - ce = Q->centries; - e = Q->entries; + if ((seop & 0xfff0fff) == 0xfff && skb) { + if (!skb->cb[0]) { + u8 ch_mac_addr[ETH_ALEN] = + {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; + memcpy(skb->data + sizeof(struct cpl_tx_pkt), + ch_mac_addr, ETH_ALEN); + memcpy(skb->data + skb->len - 10, ch_mac_addr, + ETH_ALEN); + skb->cb[0] = 0xff; } - mapping = pci_map_page(adapter->pdev, frag->page, - frag->page_offset, - frag->size, - PCI_DMA_TODEVICE); - ce->skb = NULL; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, frag->size); - ce->single = 0; - - e->Sop = 0; - e->DataValid = 1; - e->BufferLength = frag->size; - e->AddrHigh = (u64)mapping >> 32; - e->AddrLow = (u32)mapping; - - if (i < count - 1) { - e->Eop = 0; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; - } + /* bump the reference count to avoid freeing of the + * skb once the DMA has completed. + */ + skb = skb_get(skb); + t1_sge_tx(skb, adapter, 0, adapter->port[0].dev); } } - - if (skb != sge->pskb) - ce->skb = skb; - e->Eop = 1; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; - - /* - * We always ring the doorbell for cmdQ1. For cmdQ0, we only ring - * the doorbell if the Q is asleep. There is a natural race, where - * the hardware is going to sleep just after we checked, however, - * then the interrupt handler will detect the outstanding TX packet - * and ring the doorbell for us. - */ - if (qid) { - doorbell_pio(sge, F_CMDQ1_ENABLE); - } else if (atomic_read(&Q->asleep)) { - atomic_set(&Q->asleep, 0); - doorbell_pio(sge, F_CMDQ0_ENABLE); - atomic_set(&Q->pio_pidx, Q->pidx); - } - return 0; + mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); } -#define MK_ETH_TYPE_MSS(type, mss) (((mss) & 0x3FFF) | ((type) << 14)) - /* - * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. + * Creates a t1_sge structure and returns suggested resource parameters. */ -int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) +struct sge * __devinit t1_sge_create(struct adapter *adapter, + struct sge_params *p) { - struct adapter *adapter = dev->priv; - struct cpl_tx_pkt *cpl; - struct ethhdr *eth; - size_t max_len; + struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); - /* - * We are using a non-standard hard_header_len and some kernel - * components, such as pktgen, do not handle it right. Complain - * when this happens but try to fix things up. - */ - if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { - struct sk_buff *orig_skb = skb; + if (!sge) + return NULL; + memset(sge, 0, sizeof(*sge)); - if (net_ratelimit()) - printk(KERN_ERR - "%s: Tx packet has inadequate headroom\n", - dev->name); - skb = skb_realloc_headroom(skb, sizeof(struct cpl_tx_pkt_lso)); - dev_kfree_skb_any(orig_skb); - if (!skb) - return -ENOMEM; + sge->adapter = adapter; + sge->netdev = adapter->port[0].dev; + sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : 2; + sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + init_timer(&sge->tx_reclaim_timer); + sge->tx_reclaim_timer.data = (unsigned long)sge; + sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; + + if (is_T2(sge->adapter)) { + init_timer(&sge->espibug_timer); + sge->espibug_timer.function = (void *)&espibug_workaround; + sge->espibug_timer.data = (unsigned long)sge->adapter; + sge->espibug_timeout = 1; } + - if (skb_shinfo(skb)->tso_size) { - int eth_type; - struct cpl_tx_pkt_lso *hdr; + p->cmdQ_size[0] = SGE_CMDQ0_E_N; + p->cmdQ_size[1] = SGE_CMDQ1_E_N; + p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; + p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; + p->rx_coalesce_usecs = 50; + p->coalesce_enable = 0; + p->sample_interval_usecs = 0; + p->polling = 0; - eth_type = skb->nh.raw - skb->data == ETH_HLEN ? - CPL_ETH_II : CPL_ETH_II_VLAN; - - hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr)); - hdr->opcode = CPL_TX_PKT_LSO; - hdr->ip_csum_dis = hdr->l4_csum_dis = 0; - hdr->ip_hdr_words = skb->nh.iph->ihl; - hdr->tcp_hdr_words = skb->h.th->doff; - hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, - skb_shinfo(skb)->tso_size)); - hdr->len = htonl(skb->len - sizeof(*hdr)); - cpl = (struct cpl_tx_pkt *)hdr; - } else - { - /* - * An Ethernet packet must have at least space for - * the DIX Ethernet header and be no greater than - * the device set MTU. Otherwise trash the packet. - */ - if (skb->len < ETH_HLEN) - goto t1_start_xmit_fail2; - eth = (struct ethhdr *)skb->data; - if (eth->h_proto == htons(ETH_P_8021Q)) - max_len = dev->mtu + VLAN_ETH_HLEN; - else - max_len = dev->mtu + ETH_HLEN; - if (skb->len > max_len) - goto t1_start_xmit_fail2; - - if (!(adapter->flags & UDP_CSUM_CAPABLE) && - skb->ip_summed == CHECKSUM_HW && - skb->nh.iph->protocol == IPPROTO_UDP && - skb_checksum_help(skb, 0)) - goto t1_start_xmit_fail3; - - - if (!adapter->sge->pskb) { - if (skb->protocol == htons(ETH_P_ARP) && - skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) - adapter->sge->pskb = skb; - } - cpl = (struct cpl_tx_pkt *)skb_push(skb, sizeof(*cpl)); - cpl->opcode = CPL_TX_PKT; - cpl->ip_csum_dis = 1; /* SW calculates IP csum */ - cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; - /* the length field isn't used so don't bother setting it */ - } - cpl->iff = dev->if_port; - -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - if (adapter->vlan_grp && vlan_tx_tag_present(skb)) { - cpl->vlan_valid = 1; - cpl->vlan = htons(vlan_tx_tag_get(skb)); - } else -#endif - cpl->vlan_valid = 0; - - dev->trans_start = jiffies; - return t1_sge_tx(skb, adapter, 0); - -t1_start_xmit_fail3: - printk(KERN_INFO "%s: Unable to complete checksum\n", dev->name); - goto t1_start_xmit_fail1; - -t1_start_xmit_fail2: - printk(KERN_INFO "%s: Invalid packet length %d, dropping\n", - dev->name, skb->len); - -t1_start_xmit_fail1: - dev_kfree_skb_any(skb); - return 0; + return sge; } - -void t1_sge_set_ptimeout(adapter_t *adapter, u32 val) -{ - struct sge *sge = adapter->sge; - - if (is_T2(adapter)) - sge->ptimeout = max((u32)((HZ * val) / 1000), (u32)1); -} - -u32 t1_sge_get_ptimeout(adapter_t *adapter) -{ - struct sge *sge = adapter->sge; - - return (is_T2(adapter) ? ((sge->ptimeout * 1000) / HZ) : 0); -} - diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h index 140f896def60..434b25586851 100644 --- a/drivers/net/chelsio/sge.h +++ b/drivers/net/chelsio/sge.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: sge.h * - * $Revision: 1.7 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.11 $ * + * $Date: 2005/06/21 22:10:55 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,25 +36,50 @@ * * ****************************************************************************/ -#ifndef _CHELSIO_LINUX_SGE_H_ -#define _CHELSIO_LINUX_SGE_H_ +#ifndef _CXGB_SGE_H_ +#define _CXGB_SGE_H_ #include #include #include +#ifndef IRQ_RETVAL +#define IRQ_RETVAL(x) +typedef void irqreturn_t; +#endif + +typedef irqreturn_t (*intr_handler_t)(int, void *, struct pt_regs *); + struct sge_intr_counts { unsigned int respQ_empty; /* # times respQ empty */ unsigned int respQ_overflow; /* # respQ overflow (fatal) */ unsigned int freelistQ_empty; /* # times freelist empty */ unsigned int pkt_too_big; /* packet too large (fatal) */ unsigned int pkt_mismatch; - unsigned int cmdQ_full[2]; /* not HW interrupt, host cmdQ[] full */ + unsigned int cmdQ_full[3]; /* not HW IRQ, host cmdQ[] full */ + unsigned int cmdQ_restarted[3];/* # of times cmdQ X was restarted */ + unsigned int ethernet_pkts; /* # of Ethernet packets received */ + unsigned int offload_pkts; /* # of offload packets received */ + unsigned int offload_bundles; /* # of offload pkt bundles delivered */ + unsigned int pure_rsps; /* # of non-payload responses */ + unsigned int unhandled_irqs; /* # of unhandled interrupts */ + unsigned int tx_ipfrags; + unsigned int tx_reg_pkts; + unsigned int tx_lso_pkts; + unsigned int tx_do_cksum; +}; + +struct sge_port_stats { + unsigned long rx_cso_good; /* # of successful RX csum offloads */ + unsigned long tx_cso; /* # of TX checksum offloads */ + unsigned long vlan_xtract; /* # of VLAN tag extractions */ + unsigned long vlan_insert; /* # of VLAN tag extractions */ + unsigned long tso; /* # of TSO requests */ + unsigned long rx_drops; /* # of packets dropped due to no mem */ }; struct sk_buff; struct net_device; -struct cxgbdev; struct adapter; struct sge_params; struct sge; @@ -63,7 +88,9 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *); int t1_sge_configure(struct sge *, struct sge_params *); int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); void t1_sge_destroy(struct sge *); -irqreturn_t t1_interrupt(int, void *, struct pt_regs *); +intr_handler_t t1_select_intr_handler(adapter_t *adapter); +unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid, struct net_device *netdev); int t1_start_xmit(struct sk_buff *skb, struct net_device *dev); void t1_set_vlan_accel(struct adapter *adapter, int on_off); void t1_sge_start(struct sge *); @@ -72,8 +99,7 @@ int t1_sge_intr_error_handler(struct sge *); void t1_sge_intr_enable(struct sge *); void t1_sge_intr_disable(struct sge *); void t1_sge_intr_clear(struct sge *); +const struct sge_intr_counts *t1_sge_get_intr_counts(struct sge *sge); +const struct sge_port_stats *t1_sge_get_port_stats(struct sge *sge, int port); -void t1_sge_set_ptimeout(adapter_t *adapter, u32 val); -u32 t1_sge_get_ptimeout(adapter_t *adapter); - -#endif /* _CHELSIO_LINUX_SGE_H_ */ +#endif /* _CXGB_SGE_H_ */ diff --git a/drivers/net/chelsio/subr.c b/drivers/net/chelsio/subr.c index a90a3f95fcac..1ebb5d149aef 100644 --- a/drivers/net/chelsio/subr.c +++ b/drivers/net/chelsio/subr.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: subr.c * - * $Revision: 1.12 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.27 $ * + * $Date: 2005/06/22 01:08:36 $ * * Description: * * Various subroutines (intr,pio,etc.) used by Chelsio 10G Ethernet driver. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -40,11 +40,9 @@ #include "common.h" #include "elmer0.h" #include "regs.h" - #include "gmac.h" #include "cphy.h" #include "sge.h" -#include "tp.h" #include "espi.h" /** @@ -64,7 +62,7 @@ static int t1_wait_op_done(adapter_t *adapter, int reg, u32 mask, int polarity, int attempts, int delay) { while (1) { - u32 val = t1_read_reg_4(adapter, reg) & mask; + u32 val = readl(adapter->regs + reg) & mask; if (!!val == polarity) return 0; @@ -84,9 +82,9 @@ static int __t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) { int tpi_busy; - t1_write_reg_4(adapter, A_TPI_ADDR, addr); - t1_write_reg_4(adapter, A_TPI_WR_DATA, value); - t1_write_reg_4(adapter, A_TPI_CSR, F_TPIWR); + writel(addr, adapter->regs + A_TPI_ADDR); + writel(value, adapter->regs + A_TPI_WR_DATA); + writel(F_TPIWR, adapter->regs + A_TPI_CSR); tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, TPI_ATTEMPTS, 3); @@ -100,9 +98,9 @@ int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) { int ret; - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); ret = __t1_tpi_write(adapter, addr, value); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return ret; } @@ -113,8 +111,8 @@ static int __t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) { int tpi_busy; - t1_write_reg_4(adapter, A_TPI_ADDR, addr); - t1_write_reg_4(adapter, A_TPI_CSR, 0); + writel(addr, adapter->regs + A_TPI_ADDR); + writel(0, adapter->regs + A_TPI_CSR); tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, TPI_ATTEMPTS, 3); @@ -122,7 +120,7 @@ static int __t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) CH_ALERT("%s: TPI read from 0x%x failed\n", adapter->name, addr); else - *valp = t1_read_reg_4(adapter, A_TPI_RD_DATA); + *valp = readl(adapter->regs + A_TPI_RD_DATA); return tpi_busy; } @@ -130,20 +128,12 @@ int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) { int ret; - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); ret = __t1_tpi_read(adapter, addr, valp); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return ret; } -/* - * Set a TPI parameter. - */ -static void t1_tpi_par(adapter_t *adapter, u32 value) -{ - t1_write_reg_4(adapter, A_TPI_PAR, V_TPIPAR(value)); -} - /* * Called when a port's link settings change to propagate the new values to the * associated PHY and MAC. After performing the common tasks it invokes an @@ -227,7 +217,7 @@ static int mi1_mdio_ext_read(adapter_t *adapter, int phy_addr, int mmd_addr, { u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); /* Write the address we want. */ __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); @@ -242,7 +232,7 @@ static int mi1_mdio_ext_read(adapter_t *adapter, int phy_addr, int mmd_addr, /* Read the data. */ __t1_tpi_read(adapter, A_ELMER0_PORT0_MI1_DATA, valp); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return 0; } @@ -251,7 +241,7 @@ static int mi1_mdio_ext_write(adapter_t *adapter, int phy_addr, int mmd_addr, { u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); /* Write the address we want. */ __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); @@ -264,7 +254,7 @@ static int mi1_mdio_ext_write(adapter_t *adapter, int phy_addr, int mmd_addr, __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_DATA, val); __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, MI1_OP_INDIRECT_WRITE); mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return 0; } @@ -277,7 +267,6 @@ static struct mdio_ops mi1_mdio_ext_ops = { enum { CH_BRD_N110_1F, CH_BRD_N210_1F, - CH_BRD_T210_1F, }; static struct board_info t1_board[] = { @@ -308,13 +297,15 @@ struct pci_device_id t1_pci_tbl[] = { { 0, } }; +MODULE_DEVICE_TABLE(pci, t1_pci_tbl); + /* * Return the board_info structure with a given index. Out-of-range indices * return NULL. */ const struct board_info *t1_get_board_info(unsigned int board_id) { - return board_id < DIMOF(t1_board) ? &t1_board[board_id] : NULL; + return board_id < ARRAY_SIZE(t1_board) ? &t1_board[board_id] : NULL; } struct chelsio_vpd_t { @@ -436,7 +427,6 @@ int elmer0_ext_intr_handler(adapter_t *adapter) t1_tpi_read(adapter, A_ELMER0_INT_CAUSE, &cause); switch (board_info(adapter)->board) { - case CHBT_BOARD_CHT210: case CHBT_BOARD_N210: case CHBT_BOARD_N110: if (cause & ELMER0_GP_BIT6) { /* Marvell 88x2010 interrupt */ @@ -446,23 +436,6 @@ int elmer0_ext_intr_handler(adapter_t *adapter) link_changed(adapter, 0); } break; - case CHBT_BOARD_8000: - case CHBT_BOARD_CHT110: - CH_DBG(adapter, INTR, "External interrupt cause 0x%x\n", - cause); - if (cause & ELMER0_GP_BIT1) { /* PMC3393 INTB */ - struct cmac *mac = adapter->port[0].mac; - - mac->ops->interrupt_handler(mac); - } - if (cause & ELMER0_GP_BIT5) { /* XPAK MOD_DETECT */ - u32 mod_detect; - - t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); - CH_MSG(adapter, INFO, LINK, "XPAK %s\n", - mod_detect ? "removed" : "inserted"); - } - break; } t1_tpi_write(adapter, A_ELMER0_INT_CAUSE, cause); return 0; @@ -472,11 +445,11 @@ int elmer0_ext_intr_handler(adapter_t *adapter) void t1_interrupts_enable(adapter_t *adapter) { unsigned int i; + u32 pl_intr; - adapter->slow_intr_mask = F_PL_INTR_SGE_ERR | F_PL_INTR_TP; + adapter->slow_intr_mask = F_PL_INTR_SGE_ERR; t1_sge_intr_enable(adapter->sge); - t1_tp_intr_enable(adapter->tp); if (adapter->espi) { adapter->slow_intr_mask |= F_PL_INTR_ESPI; t1_espi_intr_enable(adapter->espi); @@ -489,17 +462,15 @@ void t1_interrupts_enable(adapter_t *adapter) } /* Enable PCIX & external chip interrupts on ASIC boards. */ - if (t1_is_asic(adapter)) { - u32 pl_intr = t1_read_reg_4(adapter, A_PL_ENABLE); + pl_intr = readl(adapter->regs + A_PL_ENABLE); - /* PCI-X interrupts */ - pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, - 0xffffffff); + /* PCI-X interrupts */ + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, + 0xffffffff); - adapter->slow_intr_mask |= F_PL_INTR_EXT | F_PL_INTR_PCIX; - pl_intr |= F_PL_INTR_EXT | F_PL_INTR_PCIX; - t1_write_reg_4(adapter, A_PL_ENABLE, pl_intr); - } + adapter->slow_intr_mask |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + pl_intr |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + writel(pl_intr, adapter->regs + A_PL_ENABLE); } /* Disables all interrupts. */ @@ -508,7 +479,6 @@ void t1_interrupts_disable(adapter_t* adapter) unsigned int i; t1_sge_intr_disable(adapter->sge); - t1_tp_intr_disable(adapter->tp); if (adapter->espi) t1_espi_intr_disable(adapter->espi); @@ -519,8 +489,7 @@ void t1_interrupts_disable(adapter_t* adapter) } /* Disable PCIX & external chip interrupts. */ - if (t1_is_asic(adapter)) - t1_write_reg_4(adapter, A_PL_ENABLE, 0); + writel(0, adapter->regs + A_PL_ENABLE); /* PCI-X interrupts */ pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, 0); @@ -532,9 +501,10 @@ void t1_interrupts_disable(adapter_t* adapter) void t1_interrupts_clear(adapter_t* adapter) { unsigned int i; + u32 pl_intr; + t1_sge_intr_clear(adapter->sge); - t1_tp_intr_clear(adapter->tp); if (adapter->espi) t1_espi_intr_clear(adapter->espi); @@ -545,12 +515,10 @@ void t1_interrupts_clear(adapter_t* adapter) } /* Enable interrupts for external devices. */ - if (t1_is_asic(adapter)) { - u32 pl_intr = t1_read_reg_4(adapter, A_PL_CAUSE); + pl_intr = readl(adapter->regs + A_PL_CAUSE); - t1_write_reg_4(adapter, A_PL_CAUSE, - pl_intr | F_PL_INTR_EXT | F_PL_INTR_PCIX); - } + writel(pl_intr | F_PL_INTR_EXT | F_PL_INTR_PCIX, + adapter->regs + A_PL_CAUSE); /* PCI-X interrupts */ pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, 0xffffffff); @@ -559,17 +527,15 @@ void t1_interrupts_clear(adapter_t* adapter) /* * Slow path interrupt handler for ASICs. */ -static int asic_slow_intr(adapter_t *adapter) +int t1_slow_intr_handler(adapter_t *adapter) { - u32 cause = t1_read_reg_4(adapter, A_PL_CAUSE); + u32 cause = readl(adapter->regs + A_PL_CAUSE); cause &= adapter->slow_intr_mask; if (!cause) return 0; if (cause & F_PL_INTR_SGE_ERR) t1_sge_intr_error_handler(adapter->sge); - if (cause & F_PL_INTR_TP) - t1_tp_intr_handler(adapter->tp); if (cause & F_PL_INTR_ESPI) t1_espi_intr_handler(adapter->espi); if (cause & F_PL_INTR_PCIX) @@ -578,41 +544,82 @@ static int asic_slow_intr(adapter_t *adapter) t1_elmer0_ext_intr(adapter); /* Clear the interrupts just processed. */ - t1_write_reg_4(adapter, A_PL_CAUSE, cause); - (void)t1_read_reg_4(adapter, A_PL_CAUSE); /* flush writes */ + writel(cause, adapter->regs + A_PL_CAUSE); + (void)readl(adapter->regs + A_PL_CAUSE); /* flush writes */ return 1; } -int t1_slow_intr_handler(adapter_t *adapter) +/* Pause deadlock avoidance parameters */ +#define DROP_MSEC 16 +#define DROP_PKTS_CNT 1 + +static void set_csum_offload(adapter_t *adapter, u32 csum_bit, int enable) { - return asic_slow_intr(adapter); + u32 val = readl(adapter->regs + A_TP_GLOBAL_CONFIG); + + if (enable) + val |= csum_bit; + else + val &= ~csum_bit; + writel(val, adapter->regs + A_TP_GLOBAL_CONFIG); } -/* Power sequencing is a work-around for Intel's XPAKs. */ -static void power_sequence_xpak(adapter_t* adapter) +void t1_tp_set_ip_checksum_offload(adapter_t *adapter, int enable) { - u32 mod_detect; - u32 gpo; + set_csum_offload(adapter, F_IP_CSUM, enable); +} - /* Check for XPAK */ - t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); - if (!(ELMER0_GP_BIT5 & mod_detect)) { - /* XPAK is present */ - t1_tpi_read(adapter, A_ELMER0_GPO, &gpo); - gpo |= ELMER0_GP_BIT18; - t1_tpi_write(adapter, A_ELMER0_GPO, gpo); +void t1_tp_set_udp_checksum_offload(adapter_t *adapter, int enable) +{ + set_csum_offload(adapter, F_UDP_CSUM, enable); +} + +void t1_tp_set_tcp_checksum_offload(adapter_t *adapter, int enable) +{ + set_csum_offload(adapter, F_TCP_CSUM, enable); +} + +static void t1_tp_reset(adapter_t *adapter, unsigned int tp_clk) +{ + u32 val; + + val = F_TP_IN_CSPI_CPL | F_TP_IN_CSPI_CHECK_IP_CSUM | + F_TP_IN_CSPI_CHECK_TCP_CSUM | F_TP_IN_ESPI_ETHERNET; + val |= F_TP_IN_ESPI_CHECK_IP_CSUM | + F_TP_IN_ESPI_CHECK_TCP_CSUM; + writel(val, adapter->regs + A_TP_IN_CONFIG); + writel(F_TP_OUT_CSPI_CPL | + F_TP_OUT_ESPI_ETHERNET | + F_TP_OUT_ESPI_GENERATE_IP_CSUM | + F_TP_OUT_ESPI_GENERATE_TCP_CSUM, + adapter->regs + A_TP_OUT_CONFIG); + + val = readl(adapter->regs + A_TP_GLOBAL_CONFIG); + val &= ~(F_IP_CSUM | F_UDP_CSUM | F_TCP_CSUM); + writel(val, adapter->regs + A_TP_GLOBAL_CONFIG); + + /* + * Enable pause frame deadlock prevention. + */ + if (is_T2(adapter)) { + u32 drop_ticks = DROP_MSEC * (tp_clk / 1000); + + writel(F_ENABLE_TX_DROP | F_ENABLE_TX_ERROR | + V_DROP_TICKS_CNT(drop_ticks) | + V_NUM_PKTS_DROPPED(DROP_PKTS_CNT), + adapter->regs + A_TP_TX_DROP_CONFIG); } + + writel(F_TP_RESET, adapter->regs + A_TP_RESET); } int __devinit t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, struct adapter_params *p) { p->chip_version = bi->chip_term; - p->is_asic = (p->chip_version != CHBT_TERM_FPGA); if (p->chip_version == CHBT_TERM_T1 || - p->chip_version == CHBT_TERM_T2 || - p->chip_version == CHBT_TERM_FPGA) { - u32 val = t1_read_reg_4(adapter, A_TP_PC_CONFIG); + p->chip_version == CHBT_TERM_T2) { + u32 val = readl(adapter->regs + A_TP_PC_CONFIG); val = G_TP_PC_REV(val); if (val == 2) @@ -633,23 +640,11 @@ int __devinit t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, static int board_init(adapter_t *adapter, const struct board_info *bi) { switch (bi->board) { - case CHBT_BOARD_8000: case CHBT_BOARD_N110: case CHBT_BOARD_N210: - case CHBT_BOARD_CHT210: - case CHBT_BOARD_COUGAR: - t1_tpi_par(adapter, 0xf); + writel(V_TPIPAR(0xf), adapter->regs + A_TPI_PAR); t1_tpi_write(adapter, A_ELMER0_GPO, 0x800); break; - case CHBT_BOARD_CHT110: - t1_tpi_par(adapter, 0xf); - t1_tpi_write(adapter, A_ELMER0_GPO, 0x1800); - - /* TBD XXX Might not need. This fixes a problem - * described in the Intel SR XPAK errata. - */ - power_sequence_xpak(adapter); - break; } return 0; } @@ -663,20 +658,19 @@ int t1_init_hw_modules(adapter_t *adapter) int err = -EIO; const struct board_info *bi = board_info(adapter); - if (!adapter->mc4) { - u32 val = t1_read_reg_4(adapter, A_MC4_CFG); + if (!bi->clock_mc4) { + u32 val = readl(adapter->regs + A_MC4_CFG); - t1_write_reg_4(adapter, A_MC4_CFG, val | F_READY | F_MC4_SLOW); - t1_write_reg_4(adapter, A_MC5_CONFIG, - F_M_BUS_ENABLE | F_TCAM_RESET); + writel(val | F_READY | F_MC4_SLOW, adapter->regs + A_MC4_CFG); + writel(F_M_BUS_ENABLE | F_TCAM_RESET, + adapter->regs + A_MC5_CONFIG); } if (adapter->espi && t1_espi_init(adapter->espi, bi->chip_mac, bi->espi_nports)) goto out_err; - if (t1_tp_reset(adapter->tp, &adapter->params.tp, bi->clock_core)) - goto out_err; + t1_tp_reset(adapter, bi->clock_core); err = t1_sge_configure(adapter->sge, &adapter->params.sge); if (err) @@ -690,7 +684,7 @@ int t1_init_hw_modules(adapter_t *adapter) /* * Determine a card's PCI mode. */ -static void __devinit get_pci_mode(adapter_t *adapter, struct pci_params *p) +static void __devinit get_pci_mode(adapter_t *adapter, struct chelsio_pci_params *p) { static unsigned short speed_map[] = { 33, 66, 100, 133 }; u32 pci_mode; @@ -720,8 +714,6 @@ void t1_free_sw_modules(adapter_t *adapter) if (adapter->sge) t1_sge_destroy(adapter->sge); - if (adapter->tp) - t1_tp_destroy(adapter->tp); if (adapter->espi) t1_espi_destroy(adapter->espi); } @@ -764,21 +756,12 @@ int __devinit t1_init_sw_modules(adapter_t *adapter, goto error; } - - if (bi->espi_nports && !(adapter->espi = t1_espi_create(adapter))) { CH_ERR("%s: ESPI initialization failed\n", adapter->name); goto error; } - adapter->tp = t1_tp_create(adapter, &adapter->params.tp); - if (!adapter->tp) { - CH_ERR("%s: TP initialization failed\n", - adapter->name); - goto error; - } - board_init(adapter, bi); bi->mdio_ops->init(adapter, bi); if (bi->gphy->reset) @@ -810,14 +793,12 @@ int __devinit t1_init_sw_modules(adapter_t *adapter, * Get the port's MAC addresses either from the EEPROM if one * exists or the one hardcoded in the MAC. */ - if (!t1_is_asic(adapter) || bi->chip_mac == CHBT_MAC_DUMMY) - mac->ops->macaddress_get(mac, hw_addr); - else if (vpd_macaddress_get(adapter, i, hw_addr)) { + if (vpd_macaddress_get(adapter, i, hw_addr)) { CH_ERR("%s: could not read MAC address from VPD ROM\n", - port_name(adapter, i)); + adapter->port[i].dev->name); goto error; } - t1_set_hw_addr(adapter, i, hw_addr); + memcpy(adapter->port[i].dev->dev_addr, hw_addr, ETH_ALEN); init_link_config(&adapter->port[i].link_config, bi); } diff --git a/drivers/net/chelsio/suni1x10gexp_regs.h b/drivers/net/chelsio/suni1x10gexp_regs.h index 98352bdda89b..81816c2b708a 100644 --- a/drivers/net/chelsio/suni1x10gexp_regs.h +++ b/drivers/net/chelsio/suni1x10gexp_regs.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: suni1x10gexp_regs.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.9 $ * + * $Date: 2005/06/22 00:17:04 $ * * Description: * * PMC/SIERRA (pm3393) MAC-PHY functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -21,24 +21,16 @@ * * * http://www.chelsio.com * * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * * Maintainers: maintainers@chelsio.com * * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * + * Authors: PMC/SIERRA * * * * History: * * * ****************************************************************************/ -#ifndef _SUNI1x10GEXP_REGS_H -#define _SUNI1x10GEXP_REGS_H +#ifndef _CXGB_SUNI1x10GEXP_REGS_H_ +#define _CXGB_SUNI1x10GEXP_REGS_H_ /******************************************************************************/ /** S/UNI-1x10GE-XP REGISTER ADDRESS MAP **/ @@ -217,5 +209,5 @@ #define SUNI1x10GEXP_BITMSK_TXXG_FCRX 0x0004 #define SUNI1x10GEXP_BITMSK_TXXG_PADEN 0x0002 -#endif /* _SUNI1x10GEXP_REGS_H */ +#endif /* _CXGB_SUNI1x10GEXP_REGS_H_ */ diff --git a/drivers/net/chelsio/tp.c b/drivers/net/chelsio/tp.c deleted file mode 100644 index 9ad5c539fd28..000000000000 --- a/drivers/net/chelsio/tp.c +++ /dev/null @@ -1,188 +0,0 @@ -/***************************************************************************** - * * - * File: tp.c * - * $Revision: 1.6 $ * - * $Date: 2005/03/23 07:15:59 $ * - * Description: * - * Core ASIC Management. * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#include "common.h" -#include "regs.h" -#include "tp.h" - -struct petp { - adapter_t *adapter; -}; - -/* Pause deadlock avoidance parameters */ -#define DROP_MSEC 16 -#define DROP_PKTS_CNT 1 - - -static void tp_init(adapter_t *ap, const struct tp_params *p, - unsigned int tp_clk) -{ - if (t1_is_asic(ap)) { - u32 val; - - val = F_TP_IN_CSPI_CPL | F_TP_IN_CSPI_CHECK_IP_CSUM | - F_TP_IN_CSPI_CHECK_TCP_CSUM | F_TP_IN_ESPI_ETHERNET; - if (!p->pm_size) - val |= F_OFFLOAD_DISABLE; - else - val |= F_TP_IN_ESPI_CHECK_IP_CSUM | - F_TP_IN_ESPI_CHECK_TCP_CSUM; - t1_write_reg_4(ap, A_TP_IN_CONFIG, val); - t1_write_reg_4(ap, A_TP_OUT_CONFIG, F_TP_OUT_CSPI_CPL | - F_TP_OUT_ESPI_ETHERNET | - F_TP_OUT_ESPI_GENERATE_IP_CSUM | - F_TP_OUT_ESPI_GENERATE_TCP_CSUM); - t1_write_reg_4(ap, A_TP_GLOBAL_CONFIG, V_IP_TTL(64) | - F_PATH_MTU /* IP DF bit */ | - V_5TUPLE_LOOKUP(p->use_5tuple_mode) | - V_SYN_COOKIE_PARAMETER(29)); - - /* - * Enable pause frame deadlock prevention. - */ - if (is_T2(ap)) { - u32 drop_ticks = DROP_MSEC * (tp_clk / 1000); - - t1_write_reg_4(ap, A_TP_TX_DROP_CONFIG, - F_ENABLE_TX_DROP | F_ENABLE_TX_ERROR | - V_DROP_TICKS_CNT(drop_ticks) | - V_NUM_PKTS_DROPPED(DROP_PKTS_CNT)); - } - - } -} - -void t1_tp_destroy(struct petp *tp) -{ - kfree(tp); -} - -struct petp * __devinit t1_tp_create(adapter_t *adapter, struct tp_params *p) -{ - struct petp *tp = kmalloc(sizeof(*tp), GFP_KERNEL); - if (!tp) - return NULL; - memset(tp, 0, sizeof(*tp)); - tp->adapter = adapter; - - return tp; -} - -void t1_tp_intr_enable(struct petp *tp) -{ - u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); - - { - /* We don't use any TP interrupts */ - t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); - t1_write_reg_4(tp->adapter, A_PL_ENABLE, - tp_intr | F_PL_INTR_TP); - } -} - -void t1_tp_intr_disable(struct petp *tp) -{ - u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); - - { - t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); - t1_write_reg_4(tp->adapter, A_PL_ENABLE, - tp_intr & ~F_PL_INTR_TP); - } -} - -void t1_tp_intr_clear(struct petp *tp) -{ - t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, 0xffffffff); - t1_write_reg_4(tp->adapter, A_PL_CAUSE, F_PL_INTR_TP); -} - -int t1_tp_intr_handler(struct petp *tp) -{ - u32 cause; - - - cause = t1_read_reg_4(tp->adapter, A_TP_INT_CAUSE); - t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, cause); - return 0; -} - -static void set_csum_offload(struct petp *tp, u32 csum_bit, int enable) -{ - u32 val = t1_read_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG); - - if (enable) - val |= csum_bit; - else - val &= ~csum_bit; - t1_write_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG, val); -} - -void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_IP_CSUM, enable); -} - -void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_UDP_CSUM, enable); -} - -void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_TCP_CSUM, enable); -} - -/* - * Initialize TP state. tp_params contains initial settings for some TP - * parameters, particularly the one-time PM and CM settings. - */ -int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk) -{ - int busy = 0; - adapter_t *adapter = tp->adapter; - - tp_init(adapter, p, tp_clk); - if (!busy) - t1_write_reg_4(adapter, A_TP_RESET, F_TP_RESET); - else - CH_ERR("%s: TP initialization timed out\n", - adapter->name); - return busy; -} diff --git a/drivers/net/chelsio/tp.h b/drivers/net/chelsio/tp.h deleted file mode 100644 index 2ebc5c0d62e7..000000000000 --- a/drivers/net/chelsio/tp.h +++ /dev/null @@ -1,110 +0,0 @@ -/***************************************************************************** - * * - * File: tp.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:59 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef CHELSIO_TP_H -#define CHELSIO_TP_H - -#include "common.h" - -#define TP_MAX_RX_COALESCING_SIZE 16224U - -struct tp_mib_statistics { - - /* IP */ - u32 ipInReceive_hi; - u32 ipInReceive_lo; - u32 ipInHdrErrors_hi; - u32 ipInHdrErrors_lo; - u32 ipInAddrErrors_hi; - u32 ipInAddrErrors_lo; - u32 ipInUnknownProtos_hi; - u32 ipInUnknownProtos_lo; - u32 ipInDiscards_hi; - u32 ipInDiscards_lo; - u32 ipInDelivers_hi; - u32 ipInDelivers_lo; - u32 ipOutRequests_hi; - u32 ipOutRequests_lo; - u32 ipOutDiscards_hi; - u32 ipOutDiscards_lo; - u32 ipOutNoRoutes_hi; - u32 ipOutNoRoutes_lo; - u32 ipReasmTimeout; - u32 ipReasmReqds; - u32 ipReasmOKs; - u32 ipReasmFails; - - u32 reserved[8]; - - /* TCP */ - u32 tcpActiveOpens; - u32 tcpPassiveOpens; - u32 tcpAttemptFails; - u32 tcpEstabResets; - u32 tcpOutRsts; - u32 tcpCurrEstab; - u32 tcpInSegs_hi; - u32 tcpInSegs_lo; - u32 tcpOutSegs_hi; - u32 tcpOutSegs_lo; - u32 tcpRetransSeg_hi; - u32 tcpRetransSeg_lo; - u32 tcpInErrs_hi; - u32 tcpInErrs_lo; - u32 tcpRtoMin; - u32 tcpRtoMax; -}; - -struct petp; -struct tp_params; - -struct petp *t1_tp_create(adapter_t *adapter, struct tp_params *p); -void t1_tp_destroy(struct petp *tp); - -void t1_tp_intr_disable(struct petp *tp); -void t1_tp_intr_enable(struct petp *tp); -void t1_tp_intr_clear(struct petp *tp); -int t1_tp_intr_handler(struct petp *tp); - -void t1_tp_get_mib_statistics(adapter_t *adap, struct tp_mib_statistics *tps); -void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable); -void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable); -void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable); -int t1_tp_set_coalescing_size(struct petp *tp, unsigned int size); -int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk); -#endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 63e89e47b8e9..41e35a72b964 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2120,6 +2120,7 @@ #define PCI_DEVICE_ID_ENE_1225 0x1225 #define PCI_DEVICE_ID_ENE_1410 0x1410 #define PCI_DEVICE_ID_ENE_1420 0x1420 +#define PCI_VENDOR_ID_CHELSIO 0x1425 #define PCI_VENDOR_ID_SYBA 0x1592 #define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782 From 2d0f9eaff8e1d08b9707f5d24fe6b0ac95d231e3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 14:34:13 +1000 Subject: [PATCH 003/584] drm: add _DRM_CONSISTENT map type Added a new DRM map type _DRM_CONSISTENT for consistent PCI memory. It uses drm_pci_alloc/free for allocating/freeing the memory. From: Felix Kuhling Signed-off-by: David Airlie --- drivers/char/drm/drm.h | 3 ++- drivers/char/drm/drm_bufs.c | 20 +++++++++++++++++++- drivers/char/drm/drm_drv.c | 4 ++++ drivers/char/drm/drm_proc.c | 13 ++++++++----- drivers/char/drm/drm_vm.c | 7 +++++++ 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/char/drm/drm.h b/drivers/char/drm/drm.h index e8371dd87fbc..50c4d981c497 100644 --- a/drivers/char/drm/drm.h +++ b/drivers/char/drm/drm.h @@ -209,7 +209,8 @@ typedef enum drm_map_type { _DRM_REGISTERS = 1, /**< no caching, no core dump */ _DRM_SHM = 2, /**< shared, cached */ _DRM_AGP = 3, /**< AGP/GART */ - _DRM_SCATTER_GATHER = 4 /**< Scatter/gather memory for PCI DMA */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5, /**< Consistent memory for PCI DMA */ } drm_map_type_t; diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index 4c6191d231b8..89f301ffd97e 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -180,7 +180,22 @@ int drm_addmap( struct inode *inode, struct file *filp, } map->offset += dev->sg->handle; break; - + case _DRM_CONSISTENT: + { + /* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G, + * As we're limit the address to 2^32-1 (or lses), + * casting it down to 32 bits is no problem, but we + * need to point to a 64bit variable first. */ + dma_addr_t bus_addr; + map->handle = drm_pci_alloc(dev, map->size, map->size, + 0xffffffffUL, &bus_addr); + map->offset = (unsigned long)bus_addr; + if (!map->handle) { + drm_free(map, sizeof(*map), DRM_MEM_MAPS); + return -ENOMEM; + } + break; + } default: drm_free( map, sizeof(*map), DRM_MEM_MAPS ); return -EINVAL; @@ -291,6 +306,9 @@ int drm_rmmap(struct inode *inode, struct file *filp, case _DRM_AGP: case _DRM_SCATTER_GATHER: break; + case _DRM_CONSISTENT: + drm_pci_free(dev, map->size, map->handle, map->offset); + break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); } diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index 3333c250c4d9..f4046c8c70b5 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -228,6 +228,10 @@ int drm_takedown( drm_device_t *dev ) dev->sg = NULL; } break; + case _DRM_CONSISTENT: + drm_pci_free(dev, map->size, + map->handle, map->offset); + break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); } diff --git a/drivers/char/drm/drm_proc.c b/drivers/char/drm/drm_proc.c index 4774087d2e9e..f4154cc71abb 100644 --- a/drivers/char/drm/drm_proc.c +++ b/drivers/char/drm/drm_proc.c @@ -210,8 +210,8 @@ static int drm__vm_info(char *buf, char **start, off_t offset, int request, /* Hardcoded from _DRM_FRAME_BUFFER, _DRM_REGISTERS, _DRM_SHM, _DRM_AGP, and - _DRM_SCATTER_GATHER. */ - const char *types[] = { "FB", "REG", "SHM", "AGP", "SG" }; + _DRM_SCATTER_GATHER and _DRM_CONSISTENT */ + const char *types[] = { "FB", "REG", "SHM", "AGP", "SG", "PCI" }; const char *type; int i; @@ -229,9 +229,12 @@ static int drm__vm_info(char *buf, char **start, off_t offset, int request, if (dev->maplist != NULL) list_for_each(list, &dev->maplist->head) { r_list = list_entry(list, drm_map_list_t, head); map = r_list->map; - if(!map) continue; - if (map->type < 0 || map->type > 4) type = "??"; - else type = types[map->type]; + if(!map) + continue; + if (map->type < 0 || map->type > 5) + type = "??"; + else + type = types[map->type]; DRM_PROC_PRINT("%4d 0x%08lx 0x%08lx %4.4s 0x%02x 0x%08lx ", i, map->offset, diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index 621220f3f372..644ec9dadc05 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -228,6 +228,10 @@ static void drm_vm_shm_close(struct vm_area_struct *vma) case _DRM_AGP: case _DRM_SCATTER_GATHER: break; + case _DRM_CONSISTENT: + drm_pci_free(dev, map->size, map->handle, + map->offset); + break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); } @@ -645,6 +649,9 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &drm_vm_ops; break; case _DRM_SHM: + case _DRM_CONSISTENT: + /* Consistent memory is really like shared memory. It's only + * allocate in a different way */ vma->vm_ops = &drm_vm_shm_ops; vma->vm_private_data = (void *)map; /* Don't let this area swap. Change when From b84397d6390ef04e8080d66bf528418ab5e75dc0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 14:46:12 +1000 Subject: [PATCH 004/584] drm: add framebuffer maps The patch makes drmAddBufs/drmMapBufs can handle buffers in video memory The attached patch adds a new buffer type DRM_FB_BUFFER. It works like AGP memory but uses video memory. From: Austin Yuan Signed-off-by: Dave Airlie --- drivers/char/drm/drm.h | 3 +- drivers/char/drm/drmP.h | 4 +- drivers/char/drm/drm_bufs.c | 175 +++++++++++++++++++++++++++++++++++- 3 files changed, 178 insertions(+), 4 deletions(-) diff --git a/drivers/char/drm/drm.h b/drivers/char/drm/drm.h index 50c4d981c497..3a4745f76cf0 100644 --- a/drivers/char/drm/drm.h +++ b/drivers/char/drm/drm.h @@ -369,7 +369,8 @@ typedef struct drm_buf_desc { enum { _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ - _DRM_SG_BUFFER = 0x04 /**< Scatter/gather memory buffer */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08 /**< Buffer is in frame buffer */ } flags; unsigned long agp_start; /**< * Start address of where the AGP buffers are diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 5df09cc8c6db..804e622436a9 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -96,6 +96,7 @@ #define DRIVER_IRQ_SHARED 0x80 #define DRIVER_IRQ_VBL 0x100 #define DRIVER_DMA_QUEUE 0x200 +#define DRIVER_FB_DMA 0x400 /***********************************************************************/ /** \name Begin the DRM... */ @@ -474,7 +475,8 @@ typedef struct drm_device_dma { unsigned long byte_count; enum { _DRM_DMA_USE_AGP = 0x01, - _DRM_DMA_USE_SG = 0x02 + _DRM_DMA_USE_SG = 0x02, + _DRM_DMA_USE_FB = 0x04 } flags; } drm_device_dma_t; diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index 89f301ffd97e..7c20dc344b99 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -935,6 +935,172 @@ static int drm_addbufs_sg( struct inode *inode, struct file *filp, return 0; } +int drm_addbufs_fb(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + drm_device_dma_t *dma = dev->dma; + drm_buf_desc_t request; + drm_buf_entry_t *entry; + drm_buf_t *buf; + unsigned long offset; + unsigned long agp_offset; + int count; + int order; + int size; + int alignment; + int page_order; + int total; + int byte_count; + int i; + drm_buf_t **temp_buflist; + drm_buf_desc_t __user *argp = (void __user *)arg; + + if (!drm_core_check_feature(dev, DRIVER_FB_DMA)) + return -EINVAL; + + if (!dma) + return -EINVAL; + + if (copy_from_user(&request, argp, sizeof(request))) + return -EFAULT; + + count = request.count; + order = drm_order(request.size); + size = 1 << order; + + alignment = (request.flags & _DRM_PAGE_ALIGN) + ? PAGE_ALIGN(size) : size; + page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; + total = PAGE_SIZE << page_order; + + byte_count = 0; + agp_offset = request.agp_start; + + DRM_DEBUG("count: %d\n", count); + DRM_DEBUG("order: %d\n", order); + DRM_DEBUG("size: %d\n", size); + DRM_DEBUG("agp_offset: %lu\n", agp_offset); + DRM_DEBUG("alignment: %d\n", alignment); + DRM_DEBUG("page_order: %d\n", page_order); + DRM_DEBUG("total: %d\n", total); + + if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) + return -EINVAL; + if (dev->queue_count) + return -EBUSY; /* Not while in use */ + + spin_lock(&dev->count_lock); + if (dev->buf_use) { + spin_unlock(&dev->count_lock); + return -EBUSY; + } + atomic_inc(&dev->buf_alloc); + spin_unlock(&dev->count_lock); + + down(&dev->struct_sem); + entry = &dma->bufs[order]; + if (entry->buf_count) { + up(&dev->struct_sem); + atomic_dec(&dev->buf_alloc); + return -ENOMEM; /* May only call once for each order */ + } + + if (count < 0 || count > 4096) { + up(&dev->struct_sem); + atomic_dec(&dev->buf_alloc); + return -EINVAL; + } + + entry->buflist = drm_alloc(count * sizeof(*entry->buflist), + DRM_MEM_BUFS); + if (!entry->buflist) { + up(&dev->struct_sem); + atomic_dec(&dev->buf_alloc); + return -ENOMEM; + } + memset(entry->buflist, 0, count * sizeof(*entry->buflist)); + + entry->buf_size = size; + entry->page_order = page_order; + + offset = 0; + + while (entry->buf_count < count) { + buf = &entry->buflist[entry->buf_count]; + buf->idx = dma->buf_count + entry->buf_count; + buf->total = alignment; + buf->order = order; + buf->used = 0; + + buf->offset = (dma->byte_count + offset); + buf->bus_address = agp_offset + offset; + buf->address = (void *)(agp_offset + offset); + buf->next = NULL; + buf->waiting = 0; + buf->pending = 0; + init_waitqueue_head(&buf->dma_wait); + buf->filp = NULL; + + buf->dev_priv_size = dev->driver->dev_priv_size; + buf->dev_private = drm_alloc(buf->dev_priv_size, DRM_MEM_BUFS); + if (!buf->dev_private) { + /* Set count correctly so we free the proper amount. */ + entry->buf_count = count; + drm_cleanup_buf_error(dev, entry); + up(&dev->struct_sem); + atomic_dec(&dev->buf_alloc); + return -ENOMEM; + } + memset(buf->dev_private, 0, buf->dev_priv_size); + + DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address); + + offset += alignment; + entry->buf_count++; + byte_count += PAGE_SIZE << page_order; + } + + DRM_DEBUG("byte_count: %d\n", byte_count); + + temp_buflist = drm_realloc(dma->buflist, + dma->buf_count * sizeof(*dma->buflist), + (dma->buf_count + entry->buf_count) + * sizeof(*dma->buflist), DRM_MEM_BUFS); + if (!temp_buflist) { + /* Free the entry because it isn't valid */ + drm_cleanup_buf_error(dev, entry); + up(&dev->struct_sem); + atomic_dec(&dev->buf_alloc); + return -ENOMEM; + } + dma->buflist = temp_buflist; + + for (i = 0; i < entry->buf_count; i++) { + dma->buflist[i + dma->buf_count] = &entry->buflist[i]; + } + + dma->buf_count += entry->buf_count; + dma->byte_count += byte_count; + + DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count); + DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count); + + up(&dev->struct_sem); + + request.count = entry->buf_count; + request.size = size; + + if (copy_to_user(argp, &request, sizeof(request))) + return -EFAULT; + + dma->flags = _DRM_DMA_USE_FB; + + atomic_dec(&dev->buf_alloc); + return 0; +} + /** * Add buffers for DMA transfers (ioctl). * @@ -970,6 +1136,8 @@ int drm_addbufs( struct inode *inode, struct file *filp, #endif if ( request.flags & _DRM_SG_BUFFER ) return drm_addbufs_sg( inode, filp, cmd, arg ); + else if ( request.flags & _DRM_FB_BUFFER) + return drm_addbufs_fb( inode, filp, cmd, arg ); else return drm_addbufs_pci( inode, filp, cmd, arg ); } @@ -1214,8 +1382,11 @@ int drm_mapbufs( struct inode *inode, struct file *filp, return -EFAULT; if ( request.count >= dma->buf_count ) { - if ((drm_core_has_AGP(dev) && (dma->flags & _DRM_DMA_USE_AGP)) || - (drm_core_check_feature(dev, DRIVER_SG) && (dma->flags & _DRM_DMA_USE_SG)) ) { + if ((drm_core_has_AGP(dev) && (dma->flags & _DRM_DMA_USE_AGP)) + || (drm_core_check_feature(dev, DRIVER_SG) + && (dma->flags & _DRM_DMA_USE_SG)) + || (drm_core_check_feature(dev, DRIVER_FB_DMA) + && (dma->flags & _DRM_DMA_USE_FB))) { drm_map_t *map = dev->agp_buffer_map; if ( !map ) { From d59431bf96d1e8a3d6d240343f559f5e2ace7f1d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 15:00:06 +1000 Subject: [PATCH 005/584] Refactor common, boilerplate ioctl code from drm_addbufs_* functions into drm_addbufs. This makes the code more like the BSD code, and makes the drm_addbufs_* functions callable in-kernel. From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/drm_bufs.c | 126 +++++++++++++----------------------- 1 file changed, 44 insertions(+), 82 deletions(-) diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index 7c20dc344b99..eb3cf550626d 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -362,25 +362,19 @@ static void drm_cleanup_buf_error(drm_device_t *dev, drm_buf_entry_t *entry) #if __OS_HAS_AGP /** - * Add AGP buffers for DMA transfers (ioctl). + * Add AGP buffers for DMA transfers. * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param arg pointer to a drm_buf_desc_t request. + * \param dev drm_device_t to which the buffers are to be added. + * \param request pointer to a drm_buf_desc_t describing the request. * \return zero on success or a negative number on failure. * * After some sanity checks creates a drm_buf structure for each buffer and * reallocates the buffer list of the same size order to accommodate the new * buffers. */ -static int drm_addbufs_agp( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) +static int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; drm_device_dma_t *dma = dev->dma; - drm_buf_desc_t request; drm_buf_entry_t *entry; drm_buf_t *buf; unsigned long offset; @@ -394,25 +388,20 @@ static int drm_addbufs_agp( struct inode *inode, struct file *filp, int byte_count; int i; drm_buf_t **temp_buflist; - drm_buf_desc_t __user *argp = (void __user *)arg; if ( !dma ) return -EINVAL; - if ( copy_from_user( &request, argp, - sizeof(request) ) ) - return -EFAULT; - - count = request.count; - order = drm_order( request.size ); + count = request->count; + order = drm_order(request->size); size = 1 << order; - alignment = (request.flags & _DRM_PAGE_ALIGN) + alignment = (request->flags & _DRM_PAGE_ALIGN) ? PAGE_ALIGN(size) : size; page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; total = PAGE_SIZE << page_order; byte_count = 0; - agp_offset = dev->agp->base + request.agp_start; + agp_offset = dev->agp->base + request->agp_start; DRM_DEBUG( "count: %d\n", count ); DRM_DEBUG( "order: %d\n", order ); @@ -526,11 +515,8 @@ static int drm_addbufs_agp( struct inode *inode, struct file *filp, up( &dev->struct_sem ); - request.count = entry->buf_count; - request.size = size; - - if ( copy_to_user( argp, &request, sizeof(request) ) ) - return -EFAULT; + request->count = entry->buf_count; + request->size = size; dma->flags = _DRM_DMA_USE_AGP; @@ -539,13 +525,9 @@ static int drm_addbufs_agp( struct inode *inode, struct file *filp, } #endif /* __OS_HAS_AGP */ -static int drm_addbufs_pci( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) +static int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; drm_device_dma_t *dma = dev->dma; - drm_buf_desc_t request; int count; int order; int size; @@ -561,26 +543,22 @@ static int drm_addbufs_pci( struct inode *inode, struct file *filp, int page_count; unsigned long *temp_pagelist; drm_buf_t **temp_buflist; - drm_buf_desc_t __user *argp = (void __user *)arg; if (!drm_core_check_feature(dev, DRIVER_PCI_DMA)) return -EINVAL; if ( !dma ) return -EINVAL; - if ( copy_from_user( &request, argp, sizeof(request) ) ) - return -EFAULT; - - count = request.count; - order = drm_order( request.size ); + count = request->count; + order = drm_order(request->size); size = 1 << order; DRM_DEBUG( "count=%d, size=%d (%d), order=%d, queue_count=%d\n", - request.count, request.size, size, + request->count, request->size, size, order, dev->queue_count ); if ( order < DRM_MIN_ORDER || order > DRM_MAX_ORDER ) return -EINVAL; if ( dev->queue_count ) return -EBUSY; /* Not while in use */ - alignment = (request.flags & _DRM_PAGE_ALIGN) + alignment = (request->flags & _DRM_PAGE_ALIGN) ? PAGE_ALIGN(size) : size; page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; total = PAGE_SIZE << page_order; @@ -758,25 +736,17 @@ static int drm_addbufs_pci( struct inode *inode, struct file *filp, up( &dev->struct_sem ); - request.count = entry->buf_count; - request.size = size; - - if ( copy_to_user( argp, &request, sizeof(request) ) ) - return -EFAULT; + request->count = entry->buf_count; + request->size = size; atomic_dec( &dev->buf_alloc ); return 0; } -static int drm_addbufs_sg( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) +static int drm_addbufs_sg(drm_device_t *dev, drm_buf_desc_t *request) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; drm_device_dma_t *dma = dev->dma; - drm_buf_desc_t __user *argp = (void __user *)arg; - drm_buf_desc_t request; drm_buf_entry_t *entry; drm_buf_t *buf; unsigned long offset; @@ -795,20 +765,17 @@ static int drm_addbufs_sg( struct inode *inode, struct file *filp, if ( !dma ) return -EINVAL; - if ( copy_from_user( &request, argp, sizeof(request) ) ) - return -EFAULT; - - count = request.count; - order = drm_order( request.size ); + count = request->count; + order = drm_order(request->size); size = 1 << order; - alignment = (request.flags & _DRM_PAGE_ALIGN) + alignment = (request->flags & _DRM_PAGE_ALIGN) ? PAGE_ALIGN(size) : size; page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; total = PAGE_SIZE << page_order; byte_count = 0; - agp_offset = request.agp_start; + agp_offset = request->agp_start; DRM_DEBUG( "count: %d\n", count ); DRM_DEBUG( "order: %d\n", order ); @@ -923,11 +890,8 @@ static int drm_addbufs_sg( struct inode *inode, struct file *filp, up( &dev->struct_sem ); - request.count = entry->buf_count; - request.size = size; - - if ( copy_to_user( argp, &request, sizeof(request) ) ) - return -EFAULT; + request->count = entry->buf_count; + request->size = size; dma->flags = _DRM_DMA_USE_SG; @@ -935,13 +899,9 @@ static int drm_addbufs_sg( struct inode *inode, struct file *filp, return 0; } -int drm_addbufs_fb(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_addbufs_fb(drm_device_t *dev, drm_buf_desc_t *request) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; drm_device_dma_t *dma = dev->dma; - drm_buf_desc_t request; drm_buf_entry_t *entry; drm_buf_t *buf; unsigned long offset; @@ -955,7 +915,6 @@ int drm_addbufs_fb(struct inode *inode, struct file *filp, int byte_count; int i; drm_buf_t **temp_buflist; - drm_buf_desc_t __user *argp = (void __user *)arg; if (!drm_core_check_feature(dev, DRIVER_FB_DMA)) return -EINVAL; @@ -963,20 +922,17 @@ int drm_addbufs_fb(struct inode *inode, struct file *filp, if (!dma) return -EINVAL; - if (copy_from_user(&request, argp, sizeof(request))) - return -EFAULT; - - count = request.count; - order = drm_order(request.size); + count = request->count; + order = drm_order(request->size); size = 1 << order; - alignment = (request.flags & _DRM_PAGE_ALIGN) + alignment = (request->flags & _DRM_PAGE_ALIGN) ? PAGE_ALIGN(size) : size; page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; total = PAGE_SIZE << page_order; byte_count = 0; - agp_offset = request.agp_start; + agp_offset = request->agp_start; DRM_DEBUG("count: %d\n", count); DRM_DEBUG("order: %d\n", order); @@ -1089,11 +1045,8 @@ int drm_addbufs_fb(struct inode *inode, struct file *filp, up(&dev->struct_sem); - request.count = entry->buf_count; - request.size = size; - - if (copy_to_user(argp, &request, sizeof(request))) - return -EFAULT; + request->count = entry->buf_count; + request->size = size; dma->flags = _DRM_DMA_USE_FB; @@ -1121,6 +1074,7 @@ int drm_addbufs( struct inode *inode, struct file *filp, drm_buf_desc_t request; drm_file_t *priv = filp->private_data; drm_device_t *dev = priv->head->dev; + int ret; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) return -EINVAL; @@ -1131,15 +1085,23 @@ int drm_addbufs( struct inode *inode, struct file *filp, #if __OS_HAS_AGP if ( request.flags & _DRM_AGP_BUFFER ) - return drm_addbufs_agp( inode, filp, cmd, arg ); + ret=drm_addbufs_agp(dev, &request); else #endif if ( request.flags & _DRM_SG_BUFFER ) - return drm_addbufs_sg( inode, filp, cmd, arg ); + ret=drm_addbufs_sg(dev, &request); else if ( request.flags & _DRM_FB_BUFFER) - return drm_addbufs_fb( inode, filp, cmd, arg ); + ret=drm_addbufs_fb(dev, &request); else - return drm_addbufs_pci( inode, filp, cmd, arg ); + ret=drm_addbufs_pci(dev, &request); + + if (ret==0) { + if (copy_to_user((void __user *)arg, &request, + sizeof(request))) { + ret = -EFAULT; + } + } + return ret; } From 9c8da5ebbf6f87293cf8555182da271449889a69 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 15:38:56 +1000 Subject: [PATCH 006/584] drm: update support for drm pci buffers The DRM needs to change the drm_pci interface for FreeBSD compatiblity, this patch introduces the drm_dma_handle_t and uses it in the Linux code. From: Tonnerre Lombard, Eric Anholt, and Sergey Vlasov Signed-off-by: David Airlie --- drivers/char/drm/drmP.h | 16 ++++++++----- drivers/char/drm/drm_bufs.c | 22 ++++++++++-------- drivers/char/drm/drm_drv.c | 8 +++++-- drivers/char/drm/drm_pci.c | 45 +++++++++++++++++++++++++++---------- drivers/char/drm/drm_vm.c | 8 +++++-- drivers/char/drm/i915_dma.c | 15 +++++++------ drivers/char/drm/i915_drv.h | 3 ++- 7 files changed, 78 insertions(+), 39 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 804e622436a9..7e633a9ce933 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -527,6 +527,12 @@ typedef struct drm_sigdata { drm_hw_lock_t *lock; } drm_sigdata_t; +typedef struct drm_dma_handle { + dma_addr_t busaddr; + void *vaddr; + size_t size; +} drm_dma_handle_t; + /** * Mappings list */ @@ -978,12 +984,10 @@ extern int drm_ati_pcigart_cleanup(drm_device_t *dev, unsigned long addr, dma_addr_t bus_addr); -extern void *drm_pci_alloc(drm_device_t * dev, size_t size, - size_t align, dma_addr_t maxaddr, - dma_addr_t * busaddr); - -extern void drm_pci_free(drm_device_t * dev, size_t size, - void *vaddr, dma_addr_t busaddr); +extern drm_dma_handle_t *drm_pci_alloc(drm_device_t *dev, size_t size, + size_t align, dma_addr_t maxaddr); +extern void __drm_pci_free(drm_device_t *dev, drm_dma_handle_t *dmah); +extern void drm_pci_free(drm_device_t *dev, drm_dma_handle_t *dmah); /* sysfs support (drm_sysfs.c) */ struct drm_sysfs_class; diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index eb3cf550626d..be54efbefe84 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -90,6 +90,7 @@ int drm_addmap( struct inode *inode, struct file *filp, drm_map_t *map; drm_map_t __user *argp = (void __user *)arg; drm_map_list_t *list; + drm_dma_handle_t *dmah; if ( !(filp->f_mode & 3) ) return -EACCES; /* Require read/write */ @@ -181,21 +182,19 @@ int drm_addmap( struct inode *inode, struct file *filp, map->offset += dev->sg->handle; break; case _DRM_CONSISTENT: - { /* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G, - * As we're limit the address to 2^32-1 (or lses), + * As we're limiting the address to 2^32-1 (or less), * casting it down to 32 bits is no problem, but we * need to point to a 64bit variable first. */ - dma_addr_t bus_addr; - map->handle = drm_pci_alloc(dev, map->size, map->size, - 0xffffffffUL, &bus_addr); - map->offset = (unsigned long)bus_addr; - if (!map->handle) { + dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL); + if (!dmah) { drm_free(map, sizeof(*map), DRM_MEM_MAPS); return -ENOMEM; } + map->handle = dmah->vaddr; + map->offset = (unsigned long)dmah->busaddr; + kfree(dmah); break; - } default: drm_free( map, sizeof(*map), DRM_MEM_MAPS ); return -EINVAL; @@ -286,6 +285,8 @@ int drm_rmmap(struct inode *inode, struct file *filp, } if(!found_maps) { + drm_dma_handle_t dmah; + switch (map->type) { case _DRM_REGISTERS: case _DRM_FRAME_BUFFER: @@ -307,7 +308,10 @@ int drm_rmmap(struct inode *inode, struct file *filp, case _DRM_SCATTER_GATHER: break; case _DRM_CONSISTENT: - drm_pci_free(dev, map->size, map->handle, map->offset); + dmah.vaddr = map->handle; + dmah.busaddr = map->offset; + dmah.size = map->size; + __drm_pci_free(dev, &dmah); break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index f4046c8c70b5..ab172ea8e98a 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -198,6 +198,8 @@ int drm_takedown( drm_device_t *dev ) r_list = (drm_map_list_t *)list; if ( ( map = r_list->map ) ) { + drm_dma_handle_t dmah; + switch ( map->type ) { case _DRM_REGISTERS: case _DRM_FRAME_BUFFER: @@ -229,8 +231,10 @@ int drm_takedown( drm_device_t *dev ) } break; case _DRM_CONSISTENT: - drm_pci_free(dev, map->size, - map->handle, map->offset); + dmah.vaddr = map->handle; + dmah.busaddr = map->offset; + dmah.size = map->size; + __drm_pci_free(dev, &dmah); break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); diff --git a/drivers/char/drm/drm_pci.c b/drivers/char/drm/drm_pci.c index 192e8762571c..3e452e8967fa 100644 --- a/drivers/char/drm/drm_pci.c +++ b/drivers/char/drm/drm_pci.c @@ -46,10 +46,10 @@ /** * \brief Allocate a PCI consistent memory block, for DMA. */ -void *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, - dma_addr_t maxaddr, dma_addr_t * busaddr) +drm_dma_handle_t *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, + dma_addr_t maxaddr) { - void *address; + drm_dma_handle_t *dmah; #if DRM_DEBUG_MEMORY int area = DRM_MEM_DMA; @@ -74,13 +74,19 @@ void *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, return NULL; } - address = pci_alloc_consistent(dev->pdev, size, busaddr); + dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL); + if (!dmah) + return NULL; + + dmah->size = size; + dmah->vaddr = pci_alloc_consistent(dev->pdev, size, &dmah->busaddr); #if DRM_DEBUG_MEMORY - if (address == NULL) { + if (dmah->vaddr == NULL) { spin_lock(&drm_mem_lock); ++drm_mem_stats[area].fail_count; spin_unlock(&drm_mem_lock); + kfree(dmah); return NULL; } @@ -90,21 +96,25 @@ void *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, drm_ram_used += size; spin_unlock(&drm_mem_lock); #else - if (address == NULL) + if (dmah->vaddr == NULL) { + kfree(dmah); return NULL; + } #endif - memset(address, 0, size); + memset(dmah->vaddr, 0, size); - return address; + return dmah; } EXPORT_SYMBOL(drm_pci_alloc); /** - * \brief Free a PCI consistent memory block. + * \brief Free a PCI consistent memory block with freeing its descriptor. + * + * This function is for internal use in the Linux-specific DRM core code. */ void -drm_pci_free(drm_device_t * dev, size_t size, void *vaddr, dma_addr_t busaddr) +__drm_pci_free(drm_device_t * dev, drm_dma_handle_t *dmah) { #if DRM_DEBUG_MEMORY int area = DRM_MEM_DMA; @@ -112,12 +122,13 @@ drm_pci_free(drm_device_t * dev, size_t size, void *vaddr, dma_addr_t busaddr) int free_count; #endif - if (!vaddr) { + if (!dmah->vaddr) { #if DRM_DEBUG_MEMORY DRM_MEM_ERROR(area, "Attempt to free address 0\n"); #endif } else { - pci_free_consistent(dev->pdev, size, vaddr, busaddr); + pci_free_consistent(dev->pdev, dmah->size, dmah->vaddr, + dmah->busaddr); } #if DRM_DEBUG_MEMORY @@ -135,6 +146,16 @@ drm_pci_free(drm_device_t * dev, size_t size, void *vaddr, dma_addr_t busaddr) #endif } + +/** + * \brief Free a PCI consistent memory block + */ +void +drm_pci_free(drm_device_t *dev, drm_dma_handle_t *dmah) +{ + __drm_pci_free(dev, dmah); + kfree(dmah); +} EXPORT_SYMBOL(drm_pci_free); /*@}*/ diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index 644ec9dadc05..675d2397def9 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -210,6 +210,8 @@ static void drm_vm_shm_close(struct vm_area_struct *vma) } if(!found_maps) { + drm_dma_handle_t dmah; + switch (map->type) { case _DRM_REGISTERS: case _DRM_FRAME_BUFFER: @@ -229,8 +231,10 @@ static void drm_vm_shm_close(struct vm_area_struct *vma) case _DRM_SCATTER_GATHER: break; case _DRM_CONSISTENT: - drm_pci_free(dev, map->size, map->handle, - map->offset); + dmah.vaddr = map->handle; + dmah.busaddr = map->offset; + dmah.size = map->size; + __drm_pci_free(dev, &dmah); break; } drm_free(map, sizeof(*map), DRM_MEM_MAPS); diff --git a/drivers/char/drm/i915_dma.c b/drivers/char/drm/i915_dma.c index acf9e52a9507..759f22943eb1 100644 --- a/drivers/char/drm/i915_dma.c +++ b/drivers/char/drm/i915_dma.c @@ -95,9 +95,8 @@ static int i915_dma_cleanup(drm_device_t * dev) drm_core_ioremapfree( &dev_priv->ring.map, dev); } - if (dev_priv->hw_status_page) { - drm_pci_free(dev, PAGE_SIZE, dev_priv->hw_status_page, - dev_priv->dma_status_page); + if (dev_priv->status_page_dmah) { + drm_pci_free(dev, dev_priv->status_page_dmah); /* Need to rewrite hardware status page */ I915_WRITE(0x02080, 0x1ffff000); } @@ -174,16 +173,18 @@ static int i915_initialize(drm_device_t * dev, dev_priv->allow_batchbuffer = 1; /* Program Hardware Status Page */ - dev_priv->hw_status_page = drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, - 0xffffffff, - &dev_priv->dma_status_page); + dev_priv->status_page_dmah = drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, + 0xffffffff); - if (!dev_priv->hw_status_page) { + if (!dev_priv->status_page_dmah) { dev->dev_private = (void *)dev_priv; i915_dma_cleanup(dev); DRM_ERROR("Can not allocate hardware status page\n"); return DRM_ERR(ENOMEM); } + dev_priv->hw_status_page = dev_priv->status_page_dmah->vaddr; + dev_priv->dma_status_page = dev_priv->status_page_dmah->busaddr; + memset(dev_priv->hw_status_page, 0, PAGE_SIZE); DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page); diff --git a/drivers/char/drm/i915_drv.h b/drivers/char/drm/i915_drv.h index 9c37d2367dd5..93080868d18f 100644 --- a/drivers/char/drm/i915_drv.h +++ b/drivers/char/drm/i915_drv.h @@ -79,9 +79,10 @@ typedef struct drm_i915_private { drm_i915_sarea_t *sarea_priv; drm_i915_ring_buffer_t ring; + drm_dma_handle_t *status_page_dmah; void *hw_status_page; - unsigned long counter; dma_addr_t dma_status_page; + unsigned long counter; int back_offset; int front_offset; From 7ab984012a879a53abb56abfe03b0c686f42b281 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 16:56:52 +1000 Subject: [PATCH 007/584] drm: update some function so a driver can call them This patch splits some ioctl functions so that they can be called in-kernel by a DRM driver. The driver will use them later. From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 34 +++++--- drivers/char/drm/drm_agpsupport.c | 137 +++++++++++++++++------------- drivers/char/drm/drm_bufs.c | 84 ++++++++++++------ drivers/char/drm/drm_drv.c | 15 ++-- 4 files changed, 168 insertions(+), 102 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 7e633a9ce933..d16d07e28b4c 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -889,11 +889,16 @@ extern int drm_lock_free(drm_device_t *dev, unsigned int context); /* Buffer management support (drm_bufs.h) */ +extern int drm_addmap(drm_device_t *dev, unsigned int offset, + unsigned int size, drm_map_type_t type, + drm_map_flags_t flags, drm_map_t **map_ptr); +extern int drm_addmap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern int drm_rmmap(drm_device_t *dev, void *handle); +extern int drm_rmmap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); + extern int drm_order( unsigned long size ); -extern int drm_addmap( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ); -extern int drm_rmmap( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ); extern int drm_addbufs( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int drm_infobufs( struct inode *inode, struct file *filp, @@ -927,15 +932,18 @@ extern void drm_vbl_send_signals( drm_device_t *dev ); /* AGP/GART support (drm_agpsupport.h) */ extern drm_agp_head_t *drm_agp_init(drm_device_t *dev); -extern int drm_agp_acquire(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern void drm_agp_do_release(drm_device_t *dev); -extern int drm_agp_release(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern int drm_agp_enable(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern int drm_agp_info(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); +extern int drm_agp_acquire(drm_device_t * dev); +extern int drm_agp_acquire_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern int drm_agp_release(drm_device_t *dev); +extern int drm_agp_release_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern int drm_agp_enable(drm_device_t *dev, drm_agp_mode_t mode); +extern int drm_agp_enable_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern int drm_agp_info(drm_device_t * dev, drm_agp_info_t *info); +extern int drm_agp_info_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); extern int drm_agp_alloc(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); extern int drm_agp_free(struct inode *inode, struct file *filp, diff --git a/drivers/char/drm/drm_agpsupport.c b/drivers/char/drm/drm_agpsupport.c index 8d94c0b5fa44..10c8b4daec51 100644 --- a/drivers/char/drm/drm_agpsupport.c +++ b/drivers/char/drm/drm_agpsupport.c @@ -37,7 +37,7 @@ #if __OS_HAS_AGP /** - * AGP information ioctl. + * Get AGP information. * * \param inode device inode. * \param filp file pointer. @@ -48,51 +48,56 @@ * Verifies the AGP device has been initialized and acquired and fills in the * drm_agp_info structure with the information in drm_agp_head::agp_info. */ -int drm_agp_info(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_agp_info(drm_device_t *dev, drm_agp_info_t *info) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; DRM_AGP_KERN *kern; - drm_agp_info_t info; if (!dev->agp || !dev->agp->acquired) return -EINVAL; kern = &dev->agp->agp_info; - info.agp_version_major = kern->version.major; - info.agp_version_minor = kern->version.minor; - info.mode = kern->mode; - info.aperture_base = kern->aper_base; - info.aperture_size = kern->aper_size * 1024 * 1024; - info.memory_allowed = kern->max_memory << PAGE_SHIFT; - info.memory_used = kern->current_memory << PAGE_SHIFT; - info.id_vendor = kern->device->vendor; - info.id_device = kern->device->device; + info->agp_version_major = kern->version.major; + info->agp_version_minor = kern->version.minor; + info->mode = kern->mode; + info->aperture_base = kern->aper_base; + info->aperture_size = kern->aper_size * 1024 * 1024; + info->memory_allowed = kern->max_memory << PAGE_SHIFT; + info->memory_used = kern->current_memory << PAGE_SHIFT; + info->id_vendor = kern->device->vendor; + info->id_device = kern->device->device; - if (copy_to_user((drm_agp_info_t __user *)arg, &info, sizeof(info))) + return 0; +} +EXPORT_SYMBOL(drm_agp_info); + +int drm_agp_info_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + drm_agp_info_t info; + int err; + + err = drm_agp_info(dev, &info); + if (err) + return err; + + if (copy_to_user((drm_agp_info_t __user *) arg, &info, sizeof(info))) return -EFAULT; return 0; } /** - * Acquire the AGP device (ioctl). + * Acquire the AGP device. * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param arg user argument. + * \param dev DRM device that is to acquire AGP * \return zero on success or a negative number on failure. * * Verifies the AGP device hasn't been acquired before and calls - * agp_acquire(). + * \c agp_backend_acquire. */ -int drm_agp_acquire(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_agp_acquire(drm_device_t *dev) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; - if (!dev->agp) return -ENODEV; if (dev->agp->acquired) @@ -102,9 +107,10 @@ int drm_agp_acquire(struct inode *inode, struct file *filp, dev->agp->acquired = 1; return 0; } +EXPORT_SYMBOL(drm_agp_acquire); /** - * Release the AGP device (ioctl). + * Acquire the AGP device (ioctl). * * \param inode device inode. * \param filp file pointer. @@ -112,63 +118,80 @@ int drm_agp_acquire(struct inode *inode, struct file *filp, * \param arg user argument. * \return zero on success or a negative number on failure. * - * Verifies the AGP device has been acquired and calls agp_backend_release(). + * Verifies the AGP device hasn't been acquired before and calls + * \c agp_backend_acquire. */ -int drm_agp_release(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_agp_acquire_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - agp_backend_release(dev->agp->bridge); - dev->agp->acquired = 0; - return 0; - + drm_file_t *priv = filp->private_data; + + return drm_agp_acquire( (drm_device_t *) priv->head->dev ); } /** * Release the AGP device. * - * Calls agp_backend_release(). + * \param dev DRM device that is to release AGP + * \return zero on success or a negative number on failure. + * + * Verifies the AGP device has been acquired and calls \c agp_backend_release. */ -void drm_agp_do_release(drm_device_t *dev) +int drm_agp_release(drm_device_t *dev) { - agp_backend_release(dev->agp->bridge); + if (!dev->agp || !dev->agp->acquired) + return -EINVAL; + agp_backend_release(dev->agp->bridge); + dev->agp->acquired = 0; + return 0; +} +EXPORT_SYMBOL(drm_agp_release); + +int drm_agp_release_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + + return drm_agp_release(dev); } /** * Enable the AGP bus. * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param arg pointer to a drm_agp_mode structure. + * \param dev DRM device that has previously acquired AGP. + * \param mode Requested AGP mode. * \return zero on success or a negative number on failure. * * Verifies the AGP device has been acquired but not enabled, and calls - * agp_enable(). + * \c agp_enable. */ -int drm_agp_enable(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_agp_enable(drm_device_t *dev, drm_agp_mode_t mode) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; - drm_agp_mode_t mode; - if (!dev->agp || !dev->agp->acquired) return -EINVAL; - if (copy_from_user(&mode, (drm_agp_mode_t __user *)arg, sizeof(mode))) - return -EFAULT; - dev->agp->mode = mode.mode; agp_enable(dev->agp->bridge, mode.mode); dev->agp->base = dev->agp->agp_info.aper_base; dev->agp->enabled = 1; return 0; } +EXPORT_SYMBOL(drm_agp_enable); + +int drm_agp_enable_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + drm_agp_mode_t mode; + + + if (copy_from_user(&mode, (drm_agp_mode_t __user *) arg, sizeof(mode))) + return -EFAULT; + + return drm_agp_enable(dev, mode); +} /** * Allocate AGP memory. diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index be54efbefe84..cd4636f7f187 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -82,26 +82,22 @@ static unsigned int map32_handle = 0x10000000; * type. Adds the map to the map list drm_device::maplist. Adds MTRR's where * applicable and if supported by the kernel. */ -int drm_addmap( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) +int drm_addmap(drm_device_t * dev, unsigned int offset, + unsigned int size, drm_map_type_t type, + drm_map_flags_t flags, drm_local_map_t ** map_ptr) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; drm_map_t *map; - drm_map_t __user *argp = (void __user *)arg; drm_map_list_t *list; drm_dma_handle_t *dmah; - if ( !(filp->f_mode & 3) ) return -EACCES; /* Require read/write */ - map = drm_alloc( sizeof(*map), DRM_MEM_MAPS ); if ( !map ) return -ENOMEM; - if ( copy_from_user( map, argp, sizeof(*map) ) ) { - drm_free( map, sizeof(*map), DRM_MEM_MAPS ); - return -EFAULT; - } + map->offset = offset; + map->size = size; + map->flags = flags; + map->type = type; /* Only allow shared memory to be removable since we only keep enough * book keeping information about shared memory to allow for removal @@ -218,10 +214,42 @@ int drm_addmap( struct inode *inode, struct file *filp, #endif up(&dev->struct_sem); - if ( copy_to_user( argp, map, sizeof(*map) ) ) + *map_ptr = map; + return 0; +} +EXPORT_SYMBOL(drm_addmap); + +int drm_addmap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + drm_map_t map; + drm_map_t *map_ptr; + drm_map_t __user *argp = (void __user *)arg; + int err; + + if (!(filp->f_mode & 3)) + return -EACCES; /* Require read/write */ + + if (copy_from_user(& map, argp, sizeof(map))) { return -EFAULT; - if (copy_to_user(&argp->handle, &map->offset, sizeof(map->offset))) + } + + err = drm_addmap( dev, map.offset, map.size, map.type, map.flags, + &map_ptr ); + + if (err) { + return err; + } + + if (copy_to_user(argp, map_ptr, sizeof(*map_ptr))) return -EFAULT; + if (map_ptr->type != _DRM_SHM) { + if (copy_to_user(&argp->handle, &map_ptr->offset, + sizeof(map_ptr->offset))) + return -EFAULT; + } return 0; } @@ -240,32 +268,23 @@ int drm_addmap( struct inode *inode, struct file *filp, * its being used, and free any associate resource (such as MTRR's) if it's not * being on use. * - * \sa addmap(). + * \sa drm_addmap */ -int drm_rmmap(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int drm_rmmap(drm_device_t *dev, void *handle) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->head->dev; struct list_head *list; drm_map_list_t *r_list = NULL; drm_vma_entry_t *pt, *prev; drm_map_t *map; - drm_map_t request; int found_maps = 0; - if (copy_from_user(&request, (drm_map_t __user *)arg, - sizeof(request))) { - return -EFAULT; - } - down(&dev->struct_sem); list = &dev->maplist->head; list_for_each(list, &dev->maplist->head) { r_list = list_entry(list, drm_map_list_t, head); if(r_list->map && - r_list->map->offset == (unsigned long) request.handle && + r_list->map->handle == handle && r_list->map->flags & _DRM_REMOVABLE) break; } @@ -319,6 +338,21 @@ int drm_rmmap(struct inode *inode, struct file *filp, up(&dev->struct_sem); return 0; } +EXPORT_SYMBOL(drm_rmmap); + +int drm_rmmap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->head->dev; + drm_map_t request; + + if (copy_from_user(&request, (drm_map_t __user *)arg, sizeof(request))) { + return -EFAULT; + } + + return drm_rmmap(dev, request.handle); +} /** * Cleanup after an error on one of the addbufs() functions. diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index ab172ea8e98a..9b09b105e1d6 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -70,8 +70,8 @@ static drm_ioctl_desc_t drm_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UNBLOCK)] = { drm_noop, 1, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_AUTH_MAGIC)] = { drm_authmagic, 1, 1 }, - [DRM_IOCTL_NR(DRM_IOCTL_ADD_MAP)] = { drm_addmap, 1, 1 }, - [DRM_IOCTL_NR(DRM_IOCTL_RM_MAP)] = { drm_rmmap, 1, 0 }, + [DRM_IOCTL_NR(DRM_IOCTL_ADD_MAP)] = { drm_addmap_ioctl,1, 1 }, + [DRM_IOCTL_NR(DRM_IOCTL_RM_MAP)] = { drm_rmmap_ioctl, 1, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_SET_SAREA_CTX)] = { drm_setsareactx, 1, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_GET_SAREA_CTX)] = { drm_getsareactx, 1, 0 }, @@ -102,10 +102,10 @@ static drm_ioctl_desc_t drm_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_CONTROL)] = { drm_control, 1, 1 }, #if __OS_HAS_AGP - [DRM_IOCTL_NR(DRM_IOCTL_AGP_ACQUIRE)] = { drm_agp_acquire, 1, 1 }, - [DRM_IOCTL_NR(DRM_IOCTL_AGP_RELEASE)] = { drm_agp_release, 1, 1 }, - [DRM_IOCTL_NR(DRM_IOCTL_AGP_ENABLE)] = { drm_agp_enable, 1, 1 }, - [DRM_IOCTL_NR(DRM_IOCTL_AGP_INFO)] = { drm_agp_info, 1, 0 }, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_ACQUIRE)] = { drm_agp_acquire_ioctl, 1, 1 }, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_RELEASE)] = { drm_agp_release_ioctl, 1, 1 }, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_ENABLE)] = { drm_agp_enable_ioctl, 1, 1 }, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_INFO)] = { drm_agp_info_ioctl, 1, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_AGP_ALLOC)] = { drm_agp_alloc, 1, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_AGP_FREE)] = { drm_agp_free, 1, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_AGP_BIND)] = { drm_agp_bind, 1, 1 }, @@ -178,7 +178,8 @@ int drm_takedown( drm_device_t *dev ) } dev->agp->memory = NULL; - if ( dev->agp->acquired ) drm_agp_do_release(dev); + if (dev->agp->acquired) + drm_agp_release(dev); dev->agp->acquired = 0; dev->agp->enabled = 0; From aff138ab8ec340c23e7c6e1a95c1518ee832a8c6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 16:58:40 +1000 Subject: [PATCH 008/584] drm: fix minor function header issue From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index 9b09b105e1d6..733af58fb3ac 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -127,7 +127,7 @@ static drm_ioctl_desc_t drm_ioctls[] = { * * Frees every resource in \p dev. * - * \sa drm_device and setup(). + * \sa drm_device */ int drm_takedown( drm_device_t *dev ) { @@ -321,7 +321,7 @@ EXPORT_SYMBOL(drm_init); * * Cleans up all DRM device, calling takedown(). * - * \sa drm_init(). + * \sa drm_init */ static void drm_cleanup( drm_device_t *dev ) { From d84f76d37c5eebb94c48337958d5a2ff2965c02d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 17:04:22 +1000 Subject: [PATCH 009/584] drm: export symbols for use by drivers This just exports symbols for use in drivers. From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/drm_agpsupport.c | 1 + drivers/char/drm/drm_bufs.c | 2 ++ drivers/char/drm/drm_memory.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/char/drm/drm_agpsupport.c b/drivers/char/drm/drm_agpsupport.c index 10c8b4daec51..d413da00329d 100644 --- a/drivers/char/drm/drm_agpsupport.c +++ b/drivers/char/drm/drm_agpsupport.c @@ -459,6 +459,7 @@ int drm_agp_bind_memory(DRM_AGP_MEM *handle, off_t start) return -EINVAL; return agp_bind_memory(handle, start); } +EXPORT_SYMBOL(drm_agp_bind_memory); /** Calls agp_unbind_memory() */ int drm_agp_unbind_memory(DRM_AGP_MEM *handle) diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index cd4636f7f187..d0b1eb6ba6ca 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -561,6 +561,7 @@ static int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request) atomic_dec( &dev->buf_alloc ); return 0; } +EXPORT_SYMBOL(drm_addbufs_agp); #endif /* __OS_HAS_AGP */ static int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request) @@ -781,6 +782,7 @@ static int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request) return 0; } +EXPORT_SYMBOL(drm_addbufs_pci); static int drm_addbufs_sg(drm_device_t *dev, drm_buf_desc_t *request) { diff --git a/drivers/char/drm/drm_memory.c b/drivers/char/drm/drm_memory.c index ace3d42f4407..ef66494b854d 100644 --- a/drivers/char/drm/drm_memory.c +++ b/drivers/char/drm/drm_memory.c @@ -146,23 +146,27 @@ DRM_AGP_MEM *drm_alloc_agp(struct agp_bridge_data *bridge, int pages, u32 type) { return drm_agp_allocate_memory(bridge, pages, type); } +EXPORT_SYMBOL(drm_alloc_agp); /** Wrapper around agp_free_memory() */ int drm_free_agp(DRM_AGP_MEM *handle, int pages) { return drm_agp_free_memory(handle) ? 0 : -EINVAL; } +EXPORT_SYMBOL(drm_free_agp); /** Wrapper around agp_bind_memory() */ int drm_bind_agp(DRM_AGP_MEM *handle, unsigned int start) { return drm_agp_bind_memory(handle, start); } +EXPORT_SYMBOL(drm_bind_agp); /** Wrapper around agp_unbind_memory() */ int drm_unbind_agp(DRM_AGP_MEM *handle) { return drm_agp_unbind_memory(handle); } +EXPORT_SYMBOL(drm_unbind_agp); #endif /* agp */ #endif /* debug_memory */ From ceb9c27aa7d61c70f4c75f017d9fbc9de50034f1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 17:07:23 +1000 Subject: [PATCH 010/584] drm: destatic exported function. Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 2 ++ drivers/char/drm/drm_bufs.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index d16d07e28b4c..8e060a2cc3db 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -889,6 +889,8 @@ extern int drm_lock_free(drm_device_t *dev, unsigned int context); /* Buffer management support (drm_bufs.h) */ +extern int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request); +extern int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request); extern int drm_addmap(drm_device_t *dev, unsigned int offset, unsigned int size, drm_map_type_t type, drm_map_flags_t flags, drm_map_t **map_ptr); diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index d0b1eb6ba6ca..06b01215fdf3 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -410,7 +410,7 @@ static void drm_cleanup_buf_error(drm_device_t *dev, drm_buf_entry_t *entry) * reallocates the buffer list of the same size order to accommodate the new * buffers. */ -static int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request) +int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request) { drm_device_dma_t *dma = dev->dma; drm_buf_entry_t *entry; @@ -564,7 +564,7 @@ static int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request) EXPORT_SYMBOL(drm_addbufs_agp); #endif /* __OS_HAS_AGP */ -static int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request) +int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request) { drm_device_dma_t *dma = dev->dma; int count; From cda173806644d2af22ffd9896eed8ef99b97d356 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 17:31:26 +1000 Subject: [PATCH 011/584] drm: add test for AGP devices and driver override for it. Added device_is_agp callback to drm_driver. This function is called by the platform-specific drm_device_is_agp function. Added implementation of this function the the Linux-specific portion of the MGA driver to detect PCI G450 cards. Added code to the Linux-specific portion of the generic DRM layer to not initialize AGP infrastructure if the card is not AGP (this matches what already existed in BSD). Fix up i810/i830 and i915 drivers to always return AGP as they don't always report the capability. Fix the MGA to not report AGP for a card that has an AGP chip behind a PCI bridge. From: Ian Romanick, Dave Airlie, Alan Hourihane Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 28 ++++++++++++++++++++++++++++ drivers/char/drm/drm_stub.c | 3 ++- drivers/char/drm/i810_dma.c | 16 ++++++++++++++++ drivers/char/drm/i810_drv.c | 1 + drivers/char/drm/i810_drv.h | 1 + drivers/char/drm/i830_dma.c | 16 ++++++++++++++++ drivers/char/drm/i830_drv.c | 1 + drivers/char/drm/i830_drv.h | 1 + drivers/char/drm/i915_dma.c | 16 ++++++++++++++++ drivers/char/drm/i915_drv.c | 1 + drivers/char/drm/i915_drv.h | 1 + drivers/char/drm/mga_drv.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/char/drm/mga_drv.h | 4 ++-- 13 files changed, 123 insertions(+), 3 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 8e060a2cc3db..a9b61864feba 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -586,7 +586,22 @@ struct drm_driver { int (*kernel_context_switch)(struct drm_device *dev, int old, int new); void (*kernel_context_switch_unlock)(struct drm_device *dev, drm_lock_t *lock); int (*vblank_wait)(struct drm_device *dev, unsigned int *sequence); + + /** + * Called by \c drm_device_is_agp. Typically used to determine if a + * card is really attached to AGP or not. + * + * \param dev DRM device handle + * + * \returns + * One of three values is returned depending on whether or not the + * card is absolutely \b not AGP (return of 0), absolutely \b is AGP + * (return of 1), or may or may not be AGP (return of 2). + */ + int (*device_is_agp) (struct drm_device * dev); + /* these have to be filled in */ + int (*postinit)(struct drm_device *, unsigned long flags); irqreturn_t (*irq_handler)( DRM_IRQ_ARGS ); void (*irq_preinstall)(struct drm_device *dev); @@ -1041,6 +1056,19 @@ static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev, unsig return NULL; } +static __inline__ int drm_device_is_agp(drm_device_t *dev) +{ + if ( dev->driver->device_is_agp != NULL ) { + int err = (*dev->driver->device_is_agp)( dev ); + + if (err != 2) { + return err; + } + } + + return pci_find_capability(dev->pdev, PCI_CAP_ID_AGP); +} + static __inline__ void drm_core_dropmap(struct drm_map *map) { } diff --git a/drivers/char/drm/drm_stub.c b/drivers/char/drm/drm_stub.c index 48829a1a086a..068ca9a8b0b4 100644 --- a/drivers/char/drm/drm_stub.c +++ b/drivers/char/drm/drm_stub.c @@ -91,7 +91,8 @@ static int drm_fill_in_dev(drm_device_t *dev, struct pci_dev *pdev, const struct goto error_out_unreg; if (drm_core_has_AGP(dev)) { - dev->agp = drm_agp_init(dev); + if (drm_device_is_agp(dev)) + dev->agp = drm_agp_init(dev); if (drm_core_check_feature(dev, DRIVER_REQUIRE_AGP) && (dev->agp == NULL)) { DRM_ERROR( "Cannot initialize the agpgart module.\n" ); retcode = -EINVAL; diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c index 18e0b7622893..0a9ac1f2e215 100644 --- a/drivers/char/drm/i810_dma.c +++ b/drivers/char/drm/i810_dma.c @@ -1383,3 +1383,19 @@ drm_ioctl_desc_t i810_ioctls[] = { }; int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls); + +/** + * Determine if the device really is AGP or not. + * + * All Intel graphics chipsets are treated as AGP, even if they are really + * PCI-e. + * + * \param dev The device to be tested. + * + * \returns + * A value of 1 is always retured to indictate every i810 is AGP. + */ +int i810_driver_device_is_agp(drm_device_t * dev) +{ + return 1; +} diff --git a/drivers/char/drm/i810_drv.c b/drivers/char/drm/i810_drv.c index ff51b3259af9..00609329d578 100644 --- a/drivers/char/drm/i810_drv.c +++ b/drivers/char/drm/i810_drv.c @@ -84,6 +84,7 @@ static struct drm_driver driver = { .dev_priv_size = sizeof(drm_i810_buf_priv_t), .pretakedown = i810_driver_pretakedown, .prerelease = i810_driver_prerelease, + .device_is_agp = i810_driver_device_is_agp, .release = i810_driver_release, .dma_quiescent = i810_driver_dma_quiescent, .reclaim_buffers = i810_reclaim_buffers, diff --git a/drivers/char/drm/i810_drv.h b/drivers/char/drm/i810_drv.h index 1b40538d1725..62ee4f58c59a 100644 --- a/drivers/char/drm/i810_drv.h +++ b/drivers/char/drm/i810_drv.h @@ -120,6 +120,7 @@ extern int i810_driver_dma_quiescent(drm_device_t *dev); extern void i810_driver_release(drm_device_t *dev, struct file *filp); extern void i810_driver_pretakedown(drm_device_t *dev); extern void i810_driver_prerelease(drm_device_t *dev, DRMFILE filp); +extern int i810_driver_device_is_agp(drm_device_t * dev); #define I810_BASE(reg) ((unsigned long) \ dev_priv->mmio_map->handle) diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c index dc7733035864..80d8966397c1 100644 --- a/drivers/char/drm/i830_dma.c +++ b/drivers/char/drm/i830_dma.c @@ -1586,3 +1586,19 @@ drm_ioctl_desc_t i830_ioctls[] = { }; int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls); + +/** + * Determine if the device really is AGP or not. + * + * All Intel graphics chipsets are treated as AGP, even if they are really + * PCI-e. + * + * \param dev The device to be tested. + * + * \returns + * A value of 1 is always retured to indictate every i8xx is AGP. + */ +int i830_driver_device_is_agp(drm_device_t * dev) +{ + return 1; +} diff --git a/drivers/char/drm/i830_drv.c b/drivers/char/drm/i830_drv.c index bc36be76b8b2..0da9cd19919e 100644 --- a/drivers/char/drm/i830_drv.c +++ b/drivers/char/drm/i830_drv.c @@ -88,6 +88,7 @@ static struct drm_driver driver = { .dev_priv_size = sizeof(drm_i830_buf_priv_t), .pretakedown = i830_driver_pretakedown, .prerelease = i830_driver_prerelease, + .device_is_agp = i830_driver_device_is_agp, .release = i830_driver_release, .dma_quiescent = i830_driver_dma_quiescent, .reclaim_buffers = i830_reclaim_buffers, diff --git a/drivers/char/drm/i830_drv.h b/drivers/char/drm/i830_drv.h index df7746131dea..63f96a8b6a4a 100644 --- a/drivers/char/drm/i830_drv.h +++ b/drivers/char/drm/i830_drv.h @@ -137,6 +137,7 @@ extern void i830_driver_pretakedown(drm_device_t *dev); extern void i830_driver_release(drm_device_t *dev, struct file *filp); extern int i830_driver_dma_quiescent(drm_device_t *dev); extern void i830_driver_prerelease(drm_device_t *dev, DRMFILE filp); +extern int i830_driver_device_is_agp(drm_device_t * dev); #define I830_BASE(reg) ((unsigned long) \ dev_priv->mmio_map->handle) diff --git a/drivers/char/drm/i915_dma.c b/drivers/char/drm/i915_dma.c index 759f22943eb1..34f552f90c4a 100644 --- a/drivers/char/drm/i915_dma.c +++ b/drivers/char/drm/i915_dma.c @@ -732,3 +732,19 @@ drm_ioctl_desc_t i915_ioctls[] = { }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); + +/** + * Determine if the device really is AGP or not. + * + * All Intel graphics chipsets are treated as AGP, even if they are really + * PCI-e. + * + * \param dev The device to be tested. + * + * \returns + * A value of 1 is always retured to indictate every i9x5 is AGP. + */ +int i915_driver_device_is_agp(drm_device_t * dev) +{ + return 1; +} diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c index 1f59d3fc79bc..106b9ec02213 100644 --- a/drivers/char/drm/i915_drv.c +++ b/drivers/char/drm/i915_drv.c @@ -79,6 +79,7 @@ static struct drm_driver driver = { DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED, .pretakedown = i915_driver_pretakedown, .prerelease = i915_driver_prerelease, + .device_is_agp = i915_driver_device_is_agp, .irq_preinstall = i915_driver_irq_preinstall, .irq_postinstall = i915_driver_irq_postinstall, .irq_uninstall = i915_driver_irq_uninstall, diff --git a/drivers/char/drm/i915_drv.h b/drivers/char/drm/i915_drv.h index 93080868d18f..70ed4e68eac8 100644 --- a/drivers/char/drm/i915_drv.h +++ b/drivers/char/drm/i915_drv.h @@ -103,6 +103,7 @@ typedef struct drm_i915_private { extern void i915_kernel_lost_context(drm_device_t * dev); extern void i915_driver_pretakedown(drm_device_t *dev); extern void i915_driver_prerelease(drm_device_t *dev, DRMFILE filp); +extern int i915_driver_device_is_agp(drm_device_t *dev); /* i915_irq.c */ extern int i915_irq_emit(DRM_IOCTL_ARGS); diff --git a/drivers/char/drm/mga_drv.c b/drivers/char/drm/mga_drv.c index 844cca9cb29d..94af13bc66a4 100644 --- a/drivers/char/drm/mga_drv.c +++ b/drivers/char/drm/mga_drv.c @@ -38,6 +38,7 @@ #include "drm_pciids.h" +static int mga_driver_device_is_agp(drm_device_t * dev); static int postinit( struct drm_device *dev, unsigned long flags ) { dev->counters += 3; @@ -81,6 +82,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | DRIVER_USE_MTRR | DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL, .pretakedown = mga_driver_pretakedown, .dma_quiescent = mga_driver_dma_quiescent, + .device_is_agp = mga_driver_device_is_agp, .vblank_wait = mga_driver_vblank_wait, .irq_preinstall = mga_driver_irq_preinstall, .irq_postinstall = mga_driver_irq_postinstall, @@ -128,3 +130,38 @@ module_exit(mga_exit); MODULE_AUTHOR( DRIVER_AUTHOR ); MODULE_DESCRIPTION( DRIVER_DESC ); MODULE_LICENSE("GPL and additional rights"); + +/** + * Determine if the device really is AGP or not. + * + * In addition to the usual tests performed by \c drm_device_is_agp, this + * function detects PCI G450 cards that appear to the system exactly like + * AGP G450 cards. + * + * \param dev The device to be tested. + * + * \returns + * If the device is a PCI G450, zero is returned. Otherwise 2 is returned. + */ +int mga_driver_device_is_agp(drm_device_t * dev) +{ + const struct pci_dev * const pdev = dev->pdev; + + + /* There are PCI versions of the G450. These cards have the + * same PCI ID as the AGP G450, but have an additional PCI-to-PCI + * bridge chip. We detect these cards, which are not currently + * supported by this driver, by looking at the device ID of the + * bus the "card" is on. If vendor is 0x3388 (Hint Corp) and the + * device is 0x0021 (HB6 Universal PCI-PCI bridge), we reject the + * device. + */ + + if ( (pdev->device == 0x0525) + && (pdev->bus->self->vendor == 0x3388) + && (pdev->bus->self->device == 0x0021) ) { + return 0; + } + + return 2; +} diff --git a/drivers/char/drm/mga_drv.h b/drivers/char/drm/mga_drv.h index 9412e2816eb7..38f913905e04 100644 --- a/drivers/char/drm/mga_drv.h +++ b/drivers/char/drm/mga_drv.h @@ -38,11 +38,11 @@ #define DRIVER_NAME "mga" #define DRIVER_DESC "Matrox G200/G400" -#define DRIVER_DATE "20021029" +#define DRIVER_DATE "20051013" #define DRIVER_MAJOR 3 #define DRIVER_MINOR 1 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_PATCHLEVEL 1 typedef struct drm_mga_primary_buffer { u8 *start; From 93f453f3ffd8f4dbb0311b58b854e7655da3d601 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 17:45:34 +1000 Subject: [PATCH 012/584] drm: add new mga ids and types From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/drm_pciids.h | 7 ++++--- drivers/char/drm/mga_drm.h | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h index 70ca4fa55c9d..4f317ec092ee 100644 --- a/drivers/char/drm/drm_pciids.h +++ b/drivers/char/drm/drm_pciids.h @@ -116,9 +116,10 @@ {0, 0, 0} #define mga_PCI_IDS \ - {0x102b, 0x0521, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x102b, 0x0525, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x102b, 0x2527, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x102b, 0x0520, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G200}, \ + {0x102b, 0x0521, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G200}, \ + {0x102b, 0x0525, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G400}, \ + {0x102b, 0x2527, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G550}, \ {0, 0, 0} #define mach64_PCI_IDS \ diff --git a/drivers/char/drm/mga_drm.h b/drivers/char/drm/mga_drm.h index 521d4451d012..4500e6e4920a 100644 --- a/drivers/char/drm/mga_drm.h +++ b/drivers/char/drm/mga_drm.h @@ -73,7 +73,8 @@ #define MGA_CARD_TYPE_G200 1 #define MGA_CARD_TYPE_G400 2 - +#define MGA_CARD_TYPE_G450 3 /* not currently used */ +#define MGA_CARD_TYPE_G550 4 #define MGA_FRONT 0x1 #define MGA_BACK 0x2 From b5d499cfdeebcb71f00f3513045796ccae718140 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 18:17:42 +1000 Subject: [PATCH 013/584] drm: make drm_alloc_agp take a dev arg. Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 2 +- drivers/char/drm/drm_agpsupport.c | 2 +- drivers/char/drm/drm_memory.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index a9b61864feba..a7fdcece6129 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -829,7 +829,7 @@ extern void *drm_ioremap_nocache(unsigned long offset, unsigned long size, drm_device_t *dev); extern void drm_ioremapfree(void *pt, unsigned long size, drm_device_t *dev); -extern DRM_AGP_MEM *drm_alloc_agp(struct agp_bridge_data *bridge, int pages, u32 type); +extern DRM_AGP_MEM *drm_alloc_agp(drm_device_t *dev, int pages, u32 type); extern int drm_free_agp(DRM_AGP_MEM *handle, int pages); extern int drm_bind_agp(DRM_AGP_MEM *handle, unsigned int start); extern int drm_unbind_agp(DRM_AGP_MEM *handle); diff --git a/drivers/char/drm/drm_agpsupport.c b/drivers/char/drm/drm_agpsupport.c index d413da00329d..ffb4acaefe84 100644 --- a/drivers/char/drm/drm_agpsupport.c +++ b/drivers/char/drm/drm_agpsupport.c @@ -229,7 +229,7 @@ int drm_agp_alloc(struct inode *inode, struct file *filp, pages = (request.size + PAGE_SIZE - 1) / PAGE_SIZE; type = (u32) request.type; - if (!(memory = drm_alloc_agp(dev->agp->bridge, pages, type))) { + if (!(memory = drm_alloc_agp(dev, pages, type))) { drm_free(entry, sizeof(*entry), DRM_MEM_AGPLISTS); return -ENOMEM; } diff --git a/drivers/char/drm/drm_memory.c b/drivers/char/drm/drm_memory.c index ef66494b854d..ff483fb418aa 100644 --- a/drivers/char/drm/drm_memory.c +++ b/drivers/char/drm/drm_memory.c @@ -142,9 +142,9 @@ void drm_free_pages(unsigned long address, int order, int area) #if __OS_HAS_AGP /** Wrapper around agp_allocate_memory() */ -DRM_AGP_MEM *drm_alloc_agp(struct agp_bridge_data *bridge, int pages, u32 type) +DRM_AGP_MEM *drm_alloc_agp(drm_device_t *dev, int pages, u32 type) { - return drm_agp_allocate_memory(bridge, pages, type); + return drm_agp_allocate_memory(dev->agp->bridge, pages, type); } EXPORT_SYMBOL(drm_alloc_agp); From 6795c985a648d1e90b367cc1387c18205ecca4b8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 18:20:09 +1000 Subject: [PATCH 014/584] Add support for PCI MGA cards to MGA DRM. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds serveral new ioctls and a new query to get_param query to support PCI MGA cards. Two ioctls were added to implement interrupt based waiting. With this change, the client-side driver no longer needs to map the primary DMA region or the MMIO region. Previously, end-of-frame waiting was done by busy waiting in the client-side driver until one of the MMIO registers (the current DMA pointer) matched a pointer to the end of primary DMA space. By using interrupts, the busy waiting and the extra mappings are removed. A third ioctl was added to bootstrap DMA. This ioctl, which is used by the X-server, moves a *LOT* of code from the X-server into the kernel. This allows the kernel to do whatever needs to be done to setup DMA buffers. The entire process and the locations of the buffers are hidden from user-mode. Additionally, a get_param query was added to differentiate between G4x0 cards and G550 cards. A gap was left in the numbering sequence so that, if needed, G450 cards could be distinguished from G400 cards. According to Ville Syrjälä, the G4x0 cards and the G550 cards handle anisotropic filtering differently. This seems the most compatible way to let the client-side driver know which card it's own. Doing this very small change now eliminates the need to bump the DRM minor version twice. http://marc.theaimsgroup.com/?l=dri-devel&m=106625815319773&w=2 (airlied - this may not work at this point, I think the follow on buffer cleanup patches will be needed) From: Ian Romanick Signed-off-by: Dave Airlie --- drivers/char/drm/Kconfig | 2 +- drivers/char/drm/mga_dma.c | 601 ++++++++++++++++++++++++++++------- drivers/char/drm/mga_drm.h | 95 +++++- drivers/char/drm/mga_drv.h | 90 ++++-- drivers/char/drm/mga_irq.c | 72 ++++- drivers/char/drm/mga_state.c | 158 ++++++--- drivers/char/drm/mga_warp.c | 139 ++++---- 7 files changed, 884 insertions(+), 273 deletions(-) diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig index 123417e43040..d6c50312aec6 100644 --- a/drivers/char/drm/Kconfig +++ b/drivers/char/drm/Kconfig @@ -82,7 +82,7 @@ endchoice config DRM_MGA tristate "Matrox g200/g400" - depends on DRM && AGP + depends on DRM help Choose this option if you have a Matrox G200, G400 or G450 graphics card. If M is selected, the module will be called mga. AGP diff --git a/drivers/char/drm/mga_dma.c b/drivers/char/drm/mga_dma.c index 832eaf8a5068..7899e281d062 100644 --- a/drivers/char/drm/mga_dma.c +++ b/drivers/char/drm/mga_dma.c @@ -23,18 +23,21 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Jeff Hartmann - * Keith Whitwell - * - * Rewritten by: - * Gareth Hughes + */ + +/** + * \file mga_dma.c + * DMA support for MGA G200 / G400. + * + * \author Rickard E. (Rik) Faith + * \author Jeff Hartmann + * \author Keith Whitwell + * \author Gareth Hughes */ #include "drmP.h" #include "drm.h" +#include "drm_sarea.h" #include "mga_drm.h" #include "mga_drv.h" @@ -148,7 +151,7 @@ void mga_do_dma_flush( drm_mga_private_t *dev_priv ) DRM_DEBUG( " space = 0x%06x\n", primary->space ); mga_flush_write_combine(); - MGA_WRITE( MGA_PRIMEND, tail | MGA_PAGPXFER ); + MGA_WRITE(MGA_PRIMEND, tail | dev_priv->dma_access); DRM_DEBUG( "done.\n" ); } @@ -190,7 +193,7 @@ void mga_do_dma_wrap_start( drm_mga_private_t *dev_priv ) DRM_DEBUG( " space = 0x%06x\n", primary->space ); mga_flush_write_combine(); - MGA_WRITE( MGA_PRIMEND, tail | MGA_PAGPXFER ); + MGA_WRITE(MGA_PRIMEND, tail | dev_priv->dma_access); set_bit( 0, &primary->wrapped ); DRM_DEBUG( "done.\n" ); @@ -396,23 +399,383 @@ int mga_freelist_put( drm_device_t *dev, drm_buf_t *buf ) * DMA initialization, cleanup */ + +int mga_driver_preinit(drm_device_t *dev, unsigned long flags) +{ + drm_mga_private_t * dev_priv; + + dev_priv = drm_alloc(sizeof(drm_mga_private_t), DRM_MEM_DRIVER); + if (!dev_priv) + return DRM_ERR(ENOMEM); + + dev->dev_private = (void *)dev_priv; + memset(dev_priv, 0, sizeof(drm_mga_private_t)); + + dev_priv->usec_timeout = MGA_DEFAULT_USEC_TIMEOUT; + dev_priv->chipset = flags; + + return 0; +} + +/** + * Bootstrap the driver for AGP DMA. + * + * \todo + * Investigate whether there is any benifit to storing the WARP microcode in + * AGP memory. If not, the microcode may as well always be put in PCI + * memory. + * + * \todo + * This routine needs to set dma_bs->agp_mode to the mode actually configured + * in the hardware. Looking just at the Linux AGP driver code, I don't see + * an easy way to determine this. + * + * \sa mga_do_dma_bootstrap, mga_do_pci_dma_bootstrap + */ +static int mga_do_agp_dma_bootstrap(drm_device_t * dev, + drm_mga_dma_bootstrap_t * dma_bs) +{ + drm_mga_private_t * const dev_priv = (drm_mga_private_t *) dev->dev_private; + const unsigned int warp_size = mga_warp_microcode_size(dev_priv); + int err; + unsigned offset; + const unsigned secondary_size = dma_bs->secondary_bin_count + * dma_bs->secondary_bin_size; + const unsigned agp_size = (dma_bs->agp_size << 20); + drm_buf_desc_t req; + drm_agp_mode_t mode; + drm_agp_info_t info; + + + /* Acquire AGP. */ + err = drm_agp_acquire(dev); + if (err) { + DRM_ERROR("Unable to acquire AGP\n"); + return err; + } + + err = drm_agp_info(dev, &info); + if (err) { + DRM_ERROR("Unable to get AGP info\n"); + return err; + } + + mode.mode = (info.mode & ~0x07) | dma_bs->agp_mode; + err = drm_agp_enable(dev, mode); + if (err) { + DRM_ERROR("Unable to enable AGP (mode = 0x%lx)\n", mode.mode); + return err; + } + + + /* In addition to the usual AGP mode configuration, the G200 AGP cards + * need to have the AGP mode "manually" set. + */ + + if (dev_priv->chipset == MGA_CARD_TYPE_G200) { + if (mode.mode & 0x02) { + MGA_WRITE(MGA_AGP_PLL, MGA_AGP2XPLL_ENABLE); + } + else { + MGA_WRITE(MGA_AGP_PLL, MGA_AGP2XPLL_DISABLE); + } + } + + + /* Allocate and bind AGP memory. */ + dev_priv->agp_pages = agp_size / PAGE_SIZE; + dev_priv->agp_mem = drm_alloc_agp( dev, dev_priv->agp_pages, 0 ); + if (dev_priv->agp_mem == NULL) { + dev_priv->agp_pages = 0; + DRM_ERROR("Unable to allocate %uMB AGP memory\n", + dma_bs->agp_size); + return DRM_ERR(ENOMEM); + } + + err = drm_bind_agp( dev_priv->agp_mem, 0 ); + if (err) { + DRM_ERROR("Unable to bind AGP memory\n"); + return err; + } + + offset = 0; + err = drm_addmap( dev, offset, warp_size, + _DRM_AGP, _DRM_READ_ONLY, & dev_priv->warp ); + if (err) { + DRM_ERROR("Unable to map WARP microcode\n"); + return err; + } + + offset += warp_size; + err = drm_addmap( dev, offset, dma_bs->primary_size, + _DRM_AGP, _DRM_READ_ONLY, & dev_priv->primary ); + if (err) { + DRM_ERROR("Unable to map primary DMA region\n"); + return err; + } + + offset += dma_bs->primary_size; + err = drm_addmap( dev, offset, secondary_size, + _DRM_AGP, 0, & dev->agp_buffer_map ); + if (err) { + DRM_ERROR("Unable to map secondary DMA region\n"); + return err; + } + + (void) memset( &req, 0, sizeof(req) ); + req.count = dma_bs->secondary_bin_count; + req.size = dma_bs->secondary_bin_size; + req.flags = _DRM_AGP_BUFFER; + req.agp_start = offset; + + err = drm_addbufs_agp( dev, & req ); + if (err) { + DRM_ERROR("Unable to add secondary DMA buffers\n"); + return err; + } + + offset += secondary_size; + err = drm_addmap( dev, offset, agp_size - offset, + _DRM_AGP, 0, & dev_priv->agp_textures ); + if (err) { + DRM_ERROR("Unable to map AGP texture region\n"); + return err; + } + + drm_core_ioremap(dev_priv->warp, dev); + drm_core_ioremap(dev_priv->primary, dev); + drm_core_ioremap(dev->agp_buffer_map, dev); + + if (!dev_priv->warp->handle || + !dev_priv->primary->handle || !dev->agp_buffer_map->handle) { + DRM_ERROR("failed to ioremap agp regions! (%p, %p, %p)\n", + dev_priv->warp->handle, dev_priv->primary->handle, + dev->agp_buffer_map->handle); + return DRM_ERR(ENOMEM); + } + + dev_priv->dma_access = MGA_PAGPXFER; + dev_priv->wagp_enable = MGA_WAGP_ENABLE; + + DRM_INFO("Initialized card for AGP DMA.\n"); + return 0; +} + +/** + * Bootstrap the driver for PCI DMA. + * + * \todo + * The algorithm for decreasing the size of the primary DMA buffer could be + * better. The size should be rounded up to the nearest page size, then + * decrease the request size by a single page each pass through the loop. + * + * \todo + * Determine whether the maximum address passed to drm_pci_alloc is correct. + * The same goes for drm_addbufs_pci. + * + * \sa mga_do_dma_bootstrap, mga_do_agp_dma_bootstrap + */ +static int mga_do_pci_dma_bootstrap(drm_device_t * dev, + drm_mga_dma_bootstrap_t * dma_bs) +{ + drm_mga_private_t * const dev_priv = (drm_mga_private_t *) dev->dev_private; + const unsigned int warp_size = mga_warp_microcode_size(dev_priv); + unsigned int primary_size; + unsigned int bin_count; + int err; + drm_buf_desc_t req; + + + if (dev->dma == NULL) { + DRM_ERROR("dev->dma is NULL\n"); + return DRM_ERR(EFAULT); + } + + /* The proper alignment is 0x100 for this mapping */ + err = drm_addmap(dev, 0, warp_size, _DRM_CONSISTENT, + _DRM_READ_ONLY, &dev_priv->warp); + if (err != 0) { + DRM_ERROR("Unable to create mapping for WARP microcode\n"); + return err; + } + + /* Other than the bottom two bits being used to encode other + * information, there don't appear to be any restrictions on the + * alignment of the primary or secondary DMA buffers. + */ + + for ( primary_size = dma_bs->primary_size + ; primary_size != 0 + ; primary_size >>= 1 ) { + /* The proper alignment for this mapping is 0x04 */ + err = drm_addmap(dev, 0, primary_size, _DRM_CONSISTENT, + _DRM_READ_ONLY, &dev_priv->primary); + if (!err) + break; + } + + if (err != 0) { + DRM_ERROR("Unable to allocate primary DMA region\n"); + return DRM_ERR(ENOMEM); + } + + if (dev_priv->primary->size != dma_bs->primary_size) { + DRM_INFO("Primary DMA buffer size reduced from %u to %u.\n", + dma_bs->primary_size, + (unsigned) dev_priv->primary->size); + dma_bs->primary_size = dev_priv->primary->size; + } + + for ( bin_count = dma_bs->secondary_bin_count + ; bin_count > 0 + ; bin_count-- ) { + (void) memset( &req, 0, sizeof(req) ); + req.count = bin_count; + req.size = dma_bs->secondary_bin_size; + + err = drm_addbufs_pci( dev, & req ); + if (!err) { + break; + } + } + + if (bin_count == 0) { + DRM_ERROR("Unable to add secondary DMA buffers\n"); + return err; + } + + if (bin_count != dma_bs->secondary_bin_count) { + DRM_INFO("Secondary PCI DMA buffer bin count reduced from %u " + "to %u.\n", dma_bs->secondary_bin_count, bin_count); + + dma_bs->secondary_bin_count = bin_count; + } + + dev_priv->dma_access = 0; + dev_priv->wagp_enable = 0; + + dma_bs->agp_mode = 0; + + DRM_INFO("Initialized card for PCI DMA.\n"); + return 0; +} + + +static int mga_do_dma_bootstrap(drm_device_t * dev, + drm_mga_dma_bootstrap_t * dma_bs) +{ + const int is_agp = (dma_bs->agp_mode != 0) && drm_device_is_agp(dev); + int err; + drm_mga_private_t * const dev_priv = + (drm_mga_private_t *) dev->dev_private; + + + dev_priv->used_new_dma_init = 1; + + /* The first steps are the same for both PCI and AGP based DMA. Map + * the cards MMIO registers and map a status page. + */ + err = drm_addmap( dev, dev_priv->mmio_base, dev_priv->mmio_size, + _DRM_REGISTERS, _DRM_READ_ONLY, & dev_priv->mmio ); + if (err) { + DRM_ERROR("Unable to map MMIO region\n"); + return err; + } + + + err = drm_addmap( dev, 0, SAREA_MAX, _DRM_SHM, + _DRM_READ_ONLY | _DRM_LOCKED | _DRM_KERNEL, + & dev_priv->status ); + if (err) { + DRM_ERROR("Unable to map status region\n"); + return err; + } + + + /* The DMA initialization procedure is slightly different for PCI and + * AGP cards. AGP cards just allocate a large block of AGP memory and + * carve off portions of it for internal uses. The remaining memory + * is returned to user-mode to be used for AGP textures. + */ + + if (is_agp) { + err = mga_do_agp_dma_bootstrap(dev, dma_bs); + } + + /* If we attempted to initialize the card for AGP DMA but failed, + * clean-up any mess that may have been created. + */ + + if (err) { + mga_do_cleanup_dma(dev); + } + + + /* Not only do we want to try and initialized PCI cards for PCI DMA, + * but we also try to initialized AGP cards that could not be + * initialized for AGP DMA. This covers the case where we have an AGP + * card in a system with an unsupported AGP chipset. In that case the + * card will be detected as AGP, but we won't be able to allocate any + * AGP memory, etc. + */ + + if (!is_agp || err) { + err = mga_do_pci_dma_bootstrap(dev, dma_bs); + } + + + return err; +} + +int mga_dma_bootstrap(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_mga_dma_bootstrap_t bootstrap; + int err; + + + DRM_COPY_FROM_USER_IOCTL(bootstrap, + (drm_mga_dma_bootstrap_t __user *) data, + sizeof(bootstrap)); + + err = mga_do_dma_bootstrap(dev, & bootstrap); + if (! err) { + static const int modes[] = { 0, 1, 2, 2, 4, 4, 4, 4 }; + const drm_mga_private_t * const dev_priv = + (drm_mga_private_t *) dev->dev_private; + + if (dev_priv->agp_textures != NULL) { + bootstrap.texture_handle = dev_priv->agp_textures->offset; + bootstrap.texture_size = dev_priv->agp_textures->size; + } + else { + bootstrap.texture_handle = 0; + bootstrap.texture_size = 0; + } + + bootstrap.agp_mode = modes[ bootstrap.agp_mode & 0x07 ]; + if (DRM_COPY_TO_USER( (void __user *) data, & bootstrap, + sizeof(bootstrap))) { + err = DRM_ERR(EFAULT); + } + } + else { + mga_do_cleanup_dma(dev); + } + + return err; +} + static int mga_do_init_dma( drm_device_t *dev, drm_mga_init_t *init ) { drm_mga_private_t *dev_priv; int ret; DRM_DEBUG( "\n" ); - dev_priv = drm_alloc( sizeof(drm_mga_private_t), DRM_MEM_DRIVER ); - if ( !dev_priv ) - return DRM_ERR(ENOMEM); - memset( dev_priv, 0, sizeof(drm_mga_private_t) ); + dev_priv = dev->dev_private; - dev_priv->chipset = init->chipset; - - dev_priv->usec_timeout = MGA_DEFAULT_USEC_TIMEOUT; - - if ( init->sgram ) { + if (init->sgram) { dev_priv->clear_cmd = MGA_DWGCTL_CLEAR | MGA_ATYPE_BLK; } else { dev_priv->clear_cmd = MGA_DWGCTL_CLEAR | MGA_ATYPE_RSTR; @@ -436,88 +799,65 @@ static int mga_do_init_dma( drm_device_t *dev, drm_mga_init_t *init ) DRM_GETSAREA(); - if(!dev_priv->sarea) { - DRM_ERROR( "failed to find sarea!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); + if (!dev_priv->sarea) { + DRM_ERROR("failed to find sarea!\n"); return DRM_ERR(EINVAL); } - dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset); - if(!dev_priv->mmio) { - DRM_ERROR( "failed to find mmio region!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); - } - dev_priv->status = drm_core_findmap(dev, init->status_offset); - if(!dev_priv->status) { - DRM_ERROR( "failed to find status page!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); - } - dev_priv->warp = drm_core_findmap(dev, init->warp_offset); - if(!dev_priv->warp) { - DRM_ERROR( "failed to find warp microcode region!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); - } - dev_priv->primary = drm_core_findmap(dev, init->primary_offset); - if(!dev_priv->primary) { - DRM_ERROR( "failed to find primary dma region!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); - } - dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); - if(!dev->agp_buffer_map) { - DRM_ERROR( "failed to find dma buffer region!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); + if (! dev_priv->used_new_dma_init) { + dev_priv->status = drm_core_findmap(dev, init->status_offset); + if (!dev_priv->status) { + DRM_ERROR("failed to find status page!\n"); + return DRM_ERR(EINVAL); + } + dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset); + if (!dev_priv->mmio) { + DRM_ERROR("failed to find mmio region!\n"); + return DRM_ERR(EINVAL); + } + dev_priv->warp = drm_core_findmap(dev, init->warp_offset); + if (!dev_priv->warp) { + DRM_ERROR("failed to find warp microcode region!\n"); + return DRM_ERR(EINVAL); + } + dev_priv->primary = drm_core_findmap(dev, init->primary_offset); + if (!dev_priv->primary) { + DRM_ERROR("failed to find primary dma region!\n"); + return DRM_ERR(EINVAL); + } + dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); + if (!dev->agp_buffer_map) { + DRM_ERROR("failed to find dma buffer region!\n"); + return DRM_ERR(EINVAL); + } + + drm_core_ioremap(dev_priv->warp, dev); + drm_core_ioremap(dev_priv->primary, dev); + drm_core_ioremap(dev->agp_buffer_map, dev); } dev_priv->sarea_priv = (drm_mga_sarea_t *)((u8 *)dev_priv->sarea->handle + init->sarea_priv_offset); - drm_core_ioremap( dev_priv->warp, dev ); - drm_core_ioremap( dev_priv->primary, dev ); - drm_core_ioremap( dev->agp_buffer_map, dev ); - - if(!dev_priv->warp->handle || - !dev_priv->primary->handle || - !dev->agp_buffer_map->handle ) { - DRM_ERROR( "failed to ioremap agp regions!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); + if (!dev_priv->warp->handle || + !dev_priv->primary->handle || + ((dev_priv->dma_access != 0) && + ((dev->agp_buffer_map == NULL) || + (dev->agp_buffer_map->handle == NULL)))) { + DRM_ERROR("failed to ioremap agp regions!\n"); return DRM_ERR(ENOMEM); } - ret = mga_warp_install_microcode( dev_priv ); - if ( ret < 0 ) { - DRM_ERROR( "failed to install WARP ucode!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); + ret = mga_warp_install_microcode(dev_priv); + if (ret < 0) { + DRM_ERROR("failed to install WARP ucode!\n"); return ret; } - ret = mga_warp_init( dev_priv ); - if ( ret < 0 ) { - DRM_ERROR( "failed to init WARP engine!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); + ret = mga_warp_init(dev_priv); + if (ret < 0) { + DRM_ERROR("failed to init WARP engine!\n"); return ret; } @@ -557,22 +897,18 @@ static int mga_do_init_dma( drm_device_t *dev, drm_mga_init_t *init ) dev_priv->sarea_priv->last_frame.head = 0; dev_priv->sarea_priv->last_frame.wrap = 0; - if ( mga_freelist_init( dev, dev_priv ) < 0 ) { - DRM_ERROR( "could not initialize freelist\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); + if (mga_freelist_init(dev, dev_priv) < 0) { + DRM_ERROR("could not initialize freelist\n"); return DRM_ERR(ENOMEM); } - /* Make dev_private visable to others. */ - dev->dev_private = (void *)dev_priv; return 0; } static int mga_do_cleanup_dma( drm_device_t *dev ) { - DRM_DEBUG( "\n" ); + int err = 0; + DRM_DEBUG("\n"); /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private @@ -583,20 +919,49 @@ static int mga_do_cleanup_dma( drm_device_t *dev ) if ( dev->dev_private ) { drm_mga_private_t *dev_priv = dev->dev_private; - if ( dev_priv->warp != NULL ) - drm_core_ioremapfree( dev_priv->warp, dev ); - if ( dev_priv->primary != NULL ) - drm_core_ioremapfree( dev_priv->primary, dev ); - if ( dev->agp_buffer_map != NULL ) - drm_core_ioremapfree( dev->agp_buffer_map, dev ); + if ((dev_priv->warp != NULL) + && (dev_priv->mmio->type != _DRM_CONSISTENT)) + drm_core_ioremapfree(dev_priv->warp, dev); - if ( dev_priv->head != NULL ) { - mga_freelist_cleanup( dev ); + if ((dev_priv->primary != NULL) + && (dev_priv->primary->type != _DRM_CONSISTENT)) + drm_core_ioremapfree(dev_priv->primary, dev); + + if (dev->agp_buffer_map != NULL) + drm_core_ioremapfree(dev->agp_buffer_map, dev); + + if (dev_priv->used_new_dma_init) { + if (dev_priv->agp_mem != NULL) { + dev_priv->agp_textures = NULL; + drm_unbind_agp(dev_priv->agp_mem); + + drm_free_agp(dev_priv->agp_mem, dev_priv->agp_pages); + dev_priv->agp_pages = 0; + dev_priv->agp_mem = NULL; + } + + if ((dev->agp != NULL) && dev->agp->acquired) { + err = drm_agp_release(dev); + } + + dev_priv->used_new_dma_init = 0; } - drm_free( dev->dev_private, sizeof(drm_mga_private_t), - DRM_MEM_DRIVER ); - dev->dev_private = NULL; + dev_priv->warp = NULL; + dev_priv->primary = NULL; + dev_priv->mmio = NULL; + dev_priv->status = NULL; + dev_priv->sarea = NULL; + dev_priv->sarea_priv = NULL; + dev->agp_buffer_map = NULL; + + memset(&dev_priv->prim, 0, sizeof(dev_priv->prim)); + dev_priv->warp_pipe = 0; + memset(dev_priv->warp_pipe_phys, 0, sizeof(dev_priv->warp_pipe_phys)); + + if (dev_priv->head != NULL) { + mga_freelist_cleanup(dev); + } } return 0; @@ -606,14 +971,20 @@ int mga_dma_init( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_mga_init_t init; + int err; LOCK_TEST_WITH_RETURN( dev, filp ); - DRM_COPY_FROM_USER_IOCTL( init, (drm_mga_init_t __user *)data, sizeof(init) ); + DRM_COPY_FROM_USER_IOCTL(init, (drm_mga_init_t __user *) data, + sizeof(init)); switch ( init.func ) { case MGA_INIT_DMA: - return mga_do_init_dma( dev, &init ); + err = mga_do_init_dma(dev, &init); + if (err) { + (void) mga_do_cleanup_dma(dev); + } + return err; case MGA_CLEANUP_DMA: return mga_do_cleanup_dma( dev ); } @@ -742,7 +1113,21 @@ int mga_dma_buffers( DRM_IOCTL_ARGS ) return ret; } -void mga_driver_pretakedown(drm_device_t *dev) +/** + * Called just before the module is unloaded. + */ +int mga_driver_postcleanup(drm_device_t * dev) +{ + drm_free(dev->dev_private, sizeof(drm_mga_private_t), DRM_MEM_DRIVER); + dev->dev_private = NULL; + + return 0; +} + +/** + * Called when the last opener of the device is closed. + */ +void mga_driver_pretakedown(drm_device_t * dev) { mga_do_cleanup_dma( dev ); } diff --git a/drivers/char/drm/mga_drm.h b/drivers/char/drm/mga_drm.h index 4500e6e4920a..2d8aa790379e 100644 --- a/drivers/char/drm/mga_drm.h +++ b/drivers/char/drm/mga_drm.h @@ -226,10 +226,6 @@ typedef struct _drm_mga_sarea { } drm_mga_sarea_t; -/* WARNING: If you change any of these defines, make sure to change the - * defines in the Xserver file (xf86drmMga.h) - */ - /* MGA specific ioctls * The device specific ioctl range is 0x40 to 0x79. */ @@ -244,6 +240,14 @@ typedef struct _drm_mga_sarea { #define DRM_MGA_BLIT 0x08 #define DRM_MGA_GETPARAM 0x09 +/* 3.2: + * ioctls for operating on fences. + */ +#define DRM_MGA_SET_FENCE 0x0a +#define DRM_MGA_WAIT_FENCE 0x0b +#define DRM_MGA_DMA_BOOTSTRAP 0x0c + + #define DRM_IOCTL_MGA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t) #define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t) #define DRM_IOCTL_MGA_RESET DRM_IO( DRM_COMMAND_BASE + DRM_MGA_RESET) @@ -254,6 +258,9 @@ typedef struct _drm_mga_sarea { #define DRM_IOCTL_MGA_ILOAD DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_ILOAD, drm_mga_iload_t) #define DRM_IOCTL_MGA_BLIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_BLIT, drm_mga_blit_t) #define DRM_IOCTL_MGA_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_GETPARAM, drm_mga_getparam_t) +#define DRM_IOCTL_MGA_SET_FENCE DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_SET_FENCE, uint32_t) +#define DRM_IOCTL_MGA_WAIT_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_WAIT_FENCE, uint32_t) +#define DRM_IOCTL_MGA_DMA_BOOTSTRAP DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_DMA_BOOTSTRAP, drm_mga_dma_bootstrap_t) typedef struct _drm_mga_warp_index { int installed; @@ -292,12 +299,72 @@ typedef struct drm_mga_init { unsigned long buffers_offset; } drm_mga_init_t; -typedef struct drm_mga_fullscreen { - enum { - MGA_INIT_FULLSCREEN = 0x01, - MGA_CLEANUP_FULLSCREEN = 0x02 - } func; -} drm_mga_fullscreen_t; +typedef struct drm_mga_dma_bootstrap { + /** + * \name AGP texture region + * + * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, these fields will + * be filled in with the actual AGP texture settings. + * + * \warning + * If these fields are non-zero, but dma_mga_dma_bootstrap::agp_mode + * is zero, it means that PCI memory (most likely through the use of + * an IOMMU) is being used for "AGP" textures. + */ + /*@{*/ + drm_handle_t texture_handle; /**< Handle used to map AGP textures. */ + uint32_t texture_size; /**< Size of the AGP texture region. */ + /*@}*/ + + + /** + * Requested size of the primary DMA region. + * + * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be + * filled in with the actual AGP mode. If AGP was not available + */ + uint32_t primary_size; + + + /** + * Requested number of secondary DMA buffers. + * + * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be + * filled in with the actual number of secondary DMA buffers + * allocated. Particularly when PCI DMA is used, this may be + * (subtantially) less than the number requested. + */ + uint32_t secondary_bin_count; + + + /** + * Requested size of each secondary DMA buffer. + * + * While the kernel \b is free to reduce + * dma_mga_dma_bootstrap::secondary_bin_count, it is \b not allowed + * to reduce dma_mga_dma_bootstrap::secondary_bin_size. + */ + uint32_t secondary_bin_size; + + + /** + * Bit-wise mask of AGPSTAT2_* values. Currently only \c AGPSTAT2_1X, + * \c AGPSTAT2_2X, and \c AGPSTAT2_4X are supported. If this value is + * zero, it means that PCI DMA should be used, even if AGP is + * possible. + * + * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be + * filled in with the actual AGP mode. If AGP was not available + * (i.e., PCI DMA was used), this value will be zero. + */ + uint32_t agp_mode; + + + /** + * Desired AGP GART size, measured in megabytes. + */ + uint8_t agp_size; +} drm_mga_dma_bootstrap_t; typedef struct drm_mga_clear { unsigned int flags; @@ -342,6 +409,14 @@ typedef struct _drm_mga_blit { */ #define MGA_PARAM_IRQ_NR 1 +/* 3.2: Query the actual card type. The DDX only distinguishes between + * G200 chips and non-G200 chips, which it calls G400. It turns out that + * there are some very sublte differences between the G4x0 chips and the G550 + * chips. Using this parameter query, a client-side driver can detect the + * difference between a G4x0 and a G550. + */ +#define MGA_PARAM_CARD_TYPE 2 + typedef struct drm_mga_getparam { int param; void __user *value; diff --git a/drivers/char/drm/mga_drv.h b/drivers/char/drm/mga_drv.h index 38f913905e04..b22fdbd4f830 100644 --- a/drivers/char/drm/mga_drv.h +++ b/drivers/char/drm/mga_drv.h @@ -38,11 +38,11 @@ #define DRIVER_NAME "mga" #define DRIVER_DESC "Matrox G200/G400" -#define DRIVER_DATE "20051013" +#define DRIVER_DATE "20050607" #define DRIVER_MAJOR 3 -#define DRIVER_MINOR 1 -#define DRIVER_PATCHLEVEL 1 +#define DRIVER_MINOR 2 +#define DRIVER_PATCHLEVEL 0 typedef struct drm_mga_primary_buffer { u8 *start; @@ -87,9 +87,43 @@ typedef struct drm_mga_private { int chipset; int usec_timeout; + /** + * If set, the new DMA initialization sequence was used. This is + * primarilly used to select how the driver should uninitialized its + * internal DMA structures. + */ + int used_new_dma_init; + + /** + * If AGP memory is used for DMA buffers, this will be the value + * \c MGA_PAGPXFER. Otherwise, it will be zero (for a PCI transfer). + */ + u32 dma_access; + + /** + * If AGP memory is used for DMA buffers, this will be the value + * \c MGA_WAGP_ENABLE. Otherwise, it will be zero (for a PCI + * transfer). + */ + u32 wagp_enable; + + /** + * \name MMIO region parameters. + * + * \sa drm_mga_private_t::mmio + */ + /*@{*/ + u32 mmio_base; /**< Bus address of base of MMIO. */ + u32 mmio_size; /**< Size of the MMIO region. */ + /*@}*/ + u32 clear_cmd; u32 maccess; + wait_queue_head_t fence_queue; + atomic_t last_fence_retired; + u32 next_fence_to_post; + unsigned int fb_cpp; unsigned int front_offset; unsigned int front_pitch; @@ -108,35 +142,43 @@ typedef struct drm_mga_private { drm_local_map_t *status; drm_local_map_t *warp; drm_local_map_t *primary; - drm_local_map_t *buffers; drm_local_map_t *agp_textures; + + DRM_AGP_MEM *agp_mem; + unsigned int agp_pages; } drm_mga_private_t; /* mga_dma.c */ -extern int mga_dma_init( DRM_IOCTL_ARGS ); -extern int mga_dma_flush( DRM_IOCTL_ARGS ); -extern int mga_dma_reset( DRM_IOCTL_ARGS ); -extern int mga_dma_buffers( DRM_IOCTL_ARGS ); -extern void mga_driver_pretakedown(drm_device_t *dev); -extern int mga_driver_dma_quiescent(drm_device_t *dev); +extern int mga_driver_preinit(drm_device_t * dev, unsigned long flags); +extern int mga_dma_bootstrap(DRM_IOCTL_ARGS); +extern int mga_dma_init(DRM_IOCTL_ARGS); +extern int mga_dma_flush(DRM_IOCTL_ARGS); +extern int mga_dma_reset(DRM_IOCTL_ARGS); +extern int mga_dma_buffers(DRM_IOCTL_ARGS); +extern int mga_driver_postcleanup(drm_device_t * dev); +extern void mga_driver_pretakedown(drm_device_t * dev); +extern int mga_driver_dma_quiescent(drm_device_t * dev); -extern int mga_do_wait_for_idle( drm_mga_private_t *dev_priv ); +extern int mga_do_wait_for_idle(drm_mga_private_t * dev_priv); -extern void mga_do_dma_flush( drm_mga_private_t *dev_priv ); -extern void mga_do_dma_wrap_start( drm_mga_private_t *dev_priv ); -extern void mga_do_dma_wrap_end( drm_mga_private_t *dev_priv ); +extern void mga_do_dma_flush(drm_mga_private_t * dev_priv); +extern void mga_do_dma_wrap_start(drm_mga_private_t * dev_priv); +extern void mga_do_dma_wrap_end(drm_mga_private_t * dev_priv); extern int mga_freelist_put( drm_device_t *dev, drm_buf_t *buf ); /* mga_warp.c */ -extern int mga_warp_install_microcode( drm_mga_private_t *dev_priv ); -extern int mga_warp_init( drm_mga_private_t *dev_priv ); +extern unsigned int mga_warp_microcode_size(const drm_mga_private_t * dev_priv); +extern int mga_warp_install_microcode(drm_mga_private_t * dev_priv); +extern int mga_warp_init(drm_mga_private_t * dev_priv); -extern int mga_driver_vblank_wait(drm_device_t *dev, unsigned int *sequence); -extern irqreturn_t mga_driver_irq_handler( DRM_IRQ_ARGS ); -extern void mga_driver_irq_preinstall( drm_device_t *dev ); -extern void mga_driver_irq_postinstall( drm_device_t *dev ); -extern void mga_driver_irq_uninstall( drm_device_t *dev ); + /* mga_irq.c */ +extern int mga_driver_fence_wait(drm_device_t * dev, unsigned int *sequence); +extern int mga_driver_vblank_wait(drm_device_t * dev, unsigned int *sequence); +extern irqreturn_t mga_driver_irq_handler(DRM_IRQ_ARGS); +extern void mga_driver_irq_preinstall(drm_device_t * dev); +extern void mga_driver_irq_postinstall(drm_device_t * dev); +extern void mga_driver_irq_uninstall(drm_device_t * dev); extern long mga_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); @@ -527,6 +569,12 @@ do { \ */ #define MGA_EXEC 0x0100 +/* AGP PLL encoding (for G200 only). + */ +#define MGA_AGP_PLL 0x1e4c +# define MGA_AGP2XPLL_DISABLE (0 << 0) +# define MGA_AGP2XPLL_ENABLE (1 << 0) + /* Warp registers */ #define MGA_WR0 0x2d00 diff --git a/drivers/char/drm/mga_irq.c b/drivers/char/drm/mga_irq.c index bc0b6b5d43a6..52eaa4e788f9 100644 --- a/drivers/char/drm/mga_irq.c +++ b/drivers/char/drm/mga_irq.c @@ -41,15 +41,40 @@ irqreturn_t mga_driver_irq_handler( DRM_IRQ_ARGS ) drm_mga_private_t *dev_priv = (drm_mga_private_t *)dev->dev_private; int status; + int handled = 0; + + status = MGA_READ(MGA_STATUS); - status = MGA_READ( MGA_STATUS ); - /* VBLANK interrupt */ if ( status & MGA_VLINEPEN ) { MGA_WRITE( MGA_ICLEAR, MGA_VLINEICLR ); atomic_inc(&dev->vbl_received); DRM_WAKEUP(&dev->vbl_queue); - drm_vbl_send_signals( dev ); + drm_vbl_send_signals(dev); + handled = 1; + } + + /* SOFTRAP interrupt */ + if (status & MGA_SOFTRAPEN) { + const u32 prim_start = MGA_READ(MGA_PRIMADDRESS); + const u32 prim_end = MGA_READ(MGA_PRIMEND); + + + MGA_WRITE(MGA_ICLEAR, MGA_SOFTRAPICLR); + + /* In addition to clearing the interrupt-pending bit, we + * have to write to MGA_PRIMEND to re-start the DMA operation. + */ + if ( (prim_start & ~0x03) != (prim_end & ~0x03) ) { + MGA_WRITE(MGA_PRIMEND, prim_end); + } + + atomic_inc(&dev_priv->last_fence_retired); + DRM_WAKEUP(&dev_priv->fence_queue); + handled = 1; + } + + if ( handled ) { return IRQ_HANDLED; } return IRQ_NONE; @@ -73,9 +98,28 @@ int mga_driver_vblank_wait(drm_device_t *dev, unsigned int *sequence) return ret; } -void mga_driver_irq_preinstall( drm_device_t *dev ) { - drm_mga_private_t *dev_priv = - (drm_mga_private_t *)dev->dev_private; +int mga_driver_fence_wait(drm_device_t * dev, unsigned int *sequence) +{ + drm_mga_private_t *dev_priv = (drm_mga_private_t *) dev->dev_private; + unsigned int cur_fence; + int ret = 0; + + /* Assume that the user has missed the current sequence number + * by about a day rather than she wants to wait for years + * using fences. + */ + DRM_WAIT_ON(ret, dev_priv->fence_queue, 3 * DRM_HZ, + (((cur_fence = atomic_read(&dev_priv->last_fence_retired)) + - *sequence) <= (1 << 23))); + + *sequence = cur_fence; + + return ret; +} + +void mga_driver_irq_preinstall(drm_device_t * dev) +{ + drm_mga_private_t *dev_priv = (drm_mga_private_t *) dev->dev_private; /* Disable *all* interrupts */ MGA_WRITE( MGA_IEN, 0 ); @@ -83,12 +127,14 @@ void mga_driver_irq_preinstall( drm_device_t *dev ) { MGA_WRITE( MGA_ICLEAR, ~0 ); } -void mga_driver_irq_postinstall( drm_device_t *dev ) { - drm_mga_private_t *dev_priv = - (drm_mga_private_t *)dev->dev_private; +void mga_driver_irq_postinstall(drm_device_t * dev) +{ + drm_mga_private_t *dev_priv = (drm_mga_private_t *) dev->dev_private; - /* Turn on VBL interrupt */ - MGA_WRITE( MGA_IEN, MGA_VLINEIEN ); + DRM_INIT_WAITQUEUE( &dev_priv->fence_queue ); + + /* Turn on vertical blank interrupt and soft trap interrupt. */ + MGA_WRITE(MGA_IEN, MGA_VLINEIEN | MGA_SOFTRAPEN); } void mga_driver_irq_uninstall( drm_device_t *dev ) { @@ -98,5 +144,7 @@ void mga_driver_irq_uninstall( drm_device_t *dev ) { return; /* Disable *all* interrupts */ - MGA_WRITE( MGA_IEN, 0 ); + MGA_WRITE(MGA_IEN, 0); + + dev->irq_enabled = 0; } diff --git a/drivers/char/drm/mga_state.c b/drivers/char/drm/mga_state.c index 3c7a8f5ba501..05bbb4719376 100644 --- a/drivers/char/drm/mga_state.c +++ b/drivers/char/drm/mga_state.c @@ -53,16 +53,16 @@ static void mga_emit_clip_rect( drm_mga_private_t *dev_priv, /* Force reset of DWGCTL on G400 (eliminates clip disable bit). */ - if ( dev_priv->chipset == MGA_CARD_TYPE_G400 ) { - DMA_BLOCK( MGA_DWGCTL, ctx->dwgctl, - MGA_LEN + MGA_EXEC, 0x80000000, - MGA_DWGCTL, ctx->dwgctl, - MGA_LEN + MGA_EXEC, 0x80000000 ); + if (dev_priv->chipset == MGA_CARD_TYPE_G400) { + DMA_BLOCK(MGA_DWGCTL, ctx->dwgctl, + MGA_LEN + MGA_EXEC, 0x80000000, + MGA_DWGCTL, ctx->dwgctl, + MGA_LEN + MGA_EXEC, 0x80000000); } - DMA_BLOCK( MGA_DMAPAD, 0x00000000, - MGA_CXBNDRY, (box->x2 << 16) | box->x1, - MGA_YTOP, box->y1 * pitch, - MGA_YBOT, box->y2 * pitch ); + DMA_BLOCK(MGA_DMAPAD, 0x00000000, + MGA_CXBNDRY, ((box->x2 - 1) << 16) | box->x1, + MGA_YTOP, box->y1 * pitch, + MGA_YBOT, (box->y2 - 1) * pitch); ADVANCE_DMA(); } @@ -260,12 +260,11 @@ static __inline__ void mga_g200_emit_pipe( drm_mga_private_t *dev_priv ) /* Padding required to to hardware bug. */ - DMA_BLOCK( MGA_DMAPAD, 0xffffffff, - MGA_DMAPAD, 0xffffffff, - MGA_DMAPAD, 0xffffffff, - MGA_WIADDR, (dev_priv->warp_pipe_phys[pipe] | - MGA_WMODE_START | - MGA_WAGP_ENABLE) ); + DMA_BLOCK(MGA_DMAPAD, 0xffffffff, + MGA_DMAPAD, 0xffffffff, + MGA_DMAPAD, 0xffffffff, + MGA_WIADDR, (dev_priv->warp_pipe_phys[pipe] | + MGA_WMODE_START | dev_priv->wagp_enable)); ADVANCE_DMA(); } @@ -342,12 +341,11 @@ static __inline__ void mga_g400_emit_pipe( drm_mga_private_t *dev_priv ) MGA_WR60, MGA_G400_WR_MAGIC ); /* tex1 height */ /* Padding required to to hardware bug */ - DMA_BLOCK( MGA_DMAPAD, 0xffffffff, - MGA_DMAPAD, 0xffffffff, - MGA_DMAPAD, 0xffffffff, - MGA_WIADDR2, (dev_priv->warp_pipe_phys[pipe] | - MGA_WMODE_START | - MGA_WAGP_ENABLE) ); + DMA_BLOCK(MGA_DMAPAD, 0xffffffff, + MGA_DMAPAD, 0xffffffff, + MGA_DMAPAD, 0xffffffff, + MGA_WIADDR2, (dev_priv->warp_pipe_phys[pipe] | + MGA_WMODE_START | dev_priv->wagp_enable)); ADVANCE_DMA(); } @@ -459,9 +457,9 @@ static int mga_verify_state( drm_mga_private_t *dev_priv ) if ( dirty & MGA_UPLOAD_TEX0 ) ret |= mga_verify_tex( dev_priv, 0 ); - if ( dev_priv->chipset == MGA_CARD_TYPE_G400 ) { - if ( dirty & MGA_UPLOAD_TEX1 ) - ret |= mga_verify_tex( dev_priv, 1 ); + if (dev_priv->chipset >= MGA_CARD_TYPE_G400) { + if (dirty & MGA_UPLOAD_TEX1) + ret |= mga_verify_tex(dev_priv, 1); if ( dirty & MGA_UPLOAD_PIPE ) ret |= ( sarea_priv->warp_pipe > MGA_MAX_G400_PIPES ); @@ -686,12 +684,12 @@ static void mga_dma_dispatch_vertex( drm_device_t *dev, drm_buf_t *buf ) BEGIN_DMA( 1 ); - DMA_BLOCK( MGA_DMAPAD, 0x00000000, - MGA_DMAPAD, 0x00000000, - MGA_SECADDRESS, (address | - MGA_DMA_VERTEX), - MGA_SECEND, ((address + length) | - MGA_PAGPXFER) ); + DMA_BLOCK(MGA_DMAPAD, 0x00000000, + MGA_DMAPAD, 0x00000000, + MGA_SECADDRESS, (address | + MGA_DMA_VERTEX), + MGA_SECEND, ((address + length) | + dev_priv->dma_access)); ADVANCE_DMA(); } while ( ++i < sarea_priv->nbox ); @@ -733,11 +731,11 @@ static void mga_dma_dispatch_indices( drm_device_t *dev, drm_buf_t *buf, BEGIN_DMA( 1 ); - DMA_BLOCK( MGA_DMAPAD, 0x00000000, - MGA_DMAPAD, 0x00000000, - MGA_SETUPADDRESS, address + start, - MGA_SETUPEND, ((address + end) | - MGA_PAGPXFER) ); + DMA_BLOCK(MGA_DMAPAD, 0x00000000, + MGA_DMAPAD, 0x00000000, + MGA_SETUPADDRESS, address + start, + MGA_SETUPEND, ((address + end) | + dev_priv->dma_access)); ADVANCE_DMA(); } while ( ++i < sarea_priv->nbox ); @@ -764,7 +762,7 @@ static void mga_dma_dispatch_iload( drm_device_t *dev, drm_buf_t *buf, drm_mga_private_t *dev_priv = dev->dev_private; drm_mga_buf_priv_t *buf_priv = buf->dev_private; drm_mga_context_regs_t *ctx = &dev_priv->sarea_priv->context_state; - u32 srcorg = buf->bus_address | MGA_SRCACC_AGP | MGA_SRCMAP_SYSMEM; + u32 srcorg = buf->bus_address | dev_priv->dma_access | MGA_SRCMAP_SYSMEM; u32 y2; DMA_LOCALS; DRM_DEBUG( "buf=%d used=%d\n", buf->idx, buf->used ); @@ -1095,6 +1093,9 @@ static int mga_getparam( DRM_IOCTL_ARGS ) case MGA_PARAM_IRQ_NR: value = dev->irq; break; + case MGA_PARAM_CARD_TYPE: + value = dev_priv->chipset; + break; default: return DRM_ERR(EINVAL); } @@ -1107,17 +1108,82 @@ static int mga_getparam( DRM_IOCTL_ARGS ) return 0; } +static int mga_set_fence(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_mga_private_t *dev_priv = dev->dev_private; + u32 temp; + DMA_LOCALS; + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); + + /* I would normal do this assignment in the declaration of temp, + * but dev_priv may be NULL. + */ + + temp = dev_priv->next_fence_to_post; + dev_priv->next_fence_to_post++; + + BEGIN_DMA(1); + DMA_BLOCK(MGA_DMAPAD, 0x00000000, + MGA_DMAPAD, 0x00000000, + MGA_DMAPAD, 0x00000000, + MGA_SOFTRAP, 0x00000000); + ADVANCE_DMA(); + + if (DRM_COPY_TO_USER( (u32 __user *) data, & temp, sizeof(u32))) { + DRM_ERROR("copy_to_user\n"); + return DRM_ERR(EFAULT); + } + + return 0; +} + +static int mga_wait_fence(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_mga_private_t *dev_priv = dev->dev_private; + u32 fence; + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(fence, (u32 __user *) data, sizeof(u32)); + + DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); + + mga_driver_fence_wait(dev, & fence); + + if (DRM_COPY_TO_USER( (u32 __user *) data, & fence, sizeof(u32))) { + DRM_ERROR("copy_to_user\n"); + return DRM_ERR(EFAULT); + } + + return 0; +} + drm_ioctl_desc_t mga_ioctls[] = { - [DRM_IOCTL_NR(DRM_MGA_INIT)] = { mga_dma_init, 1, 1 }, - [DRM_IOCTL_NR(DRM_MGA_FLUSH)] = { mga_dma_flush, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_RESET)] = { mga_dma_reset, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_SWAP)] = { mga_dma_swap, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_CLEAR)] = { mga_dma_clear, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_VERTEX)] = { mga_dma_vertex, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_INDICES)] = { mga_dma_indices, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_ILOAD)] = { mga_dma_iload, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_BLIT)] = { mga_dma_blit, 1, 0 }, - [DRM_IOCTL_NR(DRM_MGA_GETPARAM)]= { mga_getparam, 1, 0 }, + [DRM_IOCTL_NR(DRM_MGA_INIT)] = {mga_dma_init, 1, 1}, + [DRM_IOCTL_NR(DRM_MGA_FLUSH)] = {mga_dma_flush, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_RESET)] = {mga_dma_reset, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_SWAP)] = {mga_dma_swap, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_CLEAR)] = {mga_dma_clear, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_VERTEX)] = {mga_dma_vertex, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_INDICES)] = {mga_dma_indices, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_ILOAD)] = {mga_dma_iload, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_BLIT)] = {mga_dma_blit, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_GETPARAM)] = {mga_getparam, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_SET_FENCE)] = {mga_set_fence, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_WAIT_FENCE)] = {mga_wait_fence, 1, 0}, + [DRM_IOCTL_NR(DRM_MGA_DMA_BOOTSTRAP)] = {mga_dma_bootstrap, 1, 1}, + }; int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls); diff --git a/drivers/char/drm/mga_warp.c b/drivers/char/drm/mga_warp.c index 0a3a0cc700dc..55ccc8a0ac29 100644 --- a/drivers/char/drm/mga_warp.c +++ b/drivers/char/drm/mga_warp.c @@ -48,65 +48,52 @@ do { \ vcbase += WARP_UCODE_SIZE( which ); \ } while (0) +static const unsigned int mga_warp_g400_microcode_size = + (WARP_UCODE_SIZE(warp_g400_tgz) + + WARP_UCODE_SIZE(warp_g400_tgza) + + WARP_UCODE_SIZE(warp_g400_tgzaf) + + WARP_UCODE_SIZE(warp_g400_tgzf) + + WARP_UCODE_SIZE(warp_g400_tgzs) + + WARP_UCODE_SIZE(warp_g400_tgzsa) + + WARP_UCODE_SIZE(warp_g400_tgzsaf) + + WARP_UCODE_SIZE(warp_g400_tgzsf) + + WARP_UCODE_SIZE(warp_g400_t2gz) + + WARP_UCODE_SIZE(warp_g400_t2gza) + + WARP_UCODE_SIZE(warp_g400_t2gzaf) + + WARP_UCODE_SIZE(warp_g400_t2gzf) + + WARP_UCODE_SIZE(warp_g400_t2gzs) + + WARP_UCODE_SIZE(warp_g400_t2gzsa) + + WARP_UCODE_SIZE(warp_g400_t2gzsaf) + + WARP_UCODE_SIZE(warp_g400_t2gzsf)); -static unsigned int mga_warp_g400_microcode_size( drm_mga_private_t *dev_priv ) +static const unsigned int mga_warp_g200_microcode_size = + (WARP_UCODE_SIZE(warp_g200_tgz) + + WARP_UCODE_SIZE(warp_g200_tgza) + + WARP_UCODE_SIZE(warp_g200_tgzaf) + + WARP_UCODE_SIZE(warp_g200_tgzf) + + WARP_UCODE_SIZE(warp_g200_tgzs) + + WARP_UCODE_SIZE(warp_g200_tgzsa) + + WARP_UCODE_SIZE(warp_g200_tgzsaf) + + WARP_UCODE_SIZE(warp_g200_tgzsf)); + + +unsigned int mga_warp_microcode_size(const drm_mga_private_t * dev_priv) { - unsigned int size; - - size = ( WARP_UCODE_SIZE( warp_g400_tgz ) + - WARP_UCODE_SIZE( warp_g400_tgza ) + - WARP_UCODE_SIZE( warp_g400_tgzaf ) + - WARP_UCODE_SIZE( warp_g400_tgzf ) + - WARP_UCODE_SIZE( warp_g400_tgzs ) + - WARP_UCODE_SIZE( warp_g400_tgzsa ) + - WARP_UCODE_SIZE( warp_g400_tgzsaf ) + - WARP_UCODE_SIZE( warp_g400_tgzsf ) + - WARP_UCODE_SIZE( warp_g400_t2gz ) + - WARP_UCODE_SIZE( warp_g400_t2gza ) + - WARP_UCODE_SIZE( warp_g400_t2gzaf ) + - WARP_UCODE_SIZE( warp_g400_t2gzf ) + - WARP_UCODE_SIZE( warp_g400_t2gzs ) + - WARP_UCODE_SIZE( warp_g400_t2gzsa ) + - WARP_UCODE_SIZE( warp_g400_t2gzsaf ) + - WARP_UCODE_SIZE( warp_g400_t2gzsf ) ); - - size = PAGE_ALIGN( size ); - - DRM_DEBUG( "G400 ucode size = %d bytes\n", size ); - return size; -} - -static unsigned int mga_warp_g200_microcode_size( drm_mga_private_t *dev_priv ) -{ - unsigned int size; - - size = ( WARP_UCODE_SIZE( warp_g200_tgz ) + - WARP_UCODE_SIZE( warp_g200_tgza ) + - WARP_UCODE_SIZE( warp_g200_tgzaf ) + - WARP_UCODE_SIZE( warp_g200_tgzf ) + - WARP_UCODE_SIZE( warp_g200_tgzs ) + - WARP_UCODE_SIZE( warp_g200_tgzsa ) + - WARP_UCODE_SIZE( warp_g200_tgzsaf ) + - WARP_UCODE_SIZE( warp_g200_tgzsf ) ); - - size = PAGE_ALIGN( size ); - - DRM_DEBUG( "G200 ucode size = %d bytes\n", size ); - return size; + switch (dev_priv->chipset) { + case MGA_CARD_TYPE_G400: + case MGA_CARD_TYPE_G550: + return PAGE_ALIGN(mga_warp_g400_microcode_size); + case MGA_CARD_TYPE_G200: + return PAGE_ALIGN(mga_warp_g200_microcode_size); + default: + return 0; + } } static int mga_warp_install_g400_microcode( drm_mga_private_t *dev_priv ) { unsigned char *vcbase = dev_priv->warp->handle; unsigned long pcbase = dev_priv->warp->offset; - unsigned int size; - - size = mga_warp_g400_microcode_size( dev_priv ); - if ( size > dev_priv->warp->size ) { - DRM_ERROR( "microcode too large! (%u > %lu)\n", - size, dev_priv->warp->size ); - return DRM_ERR(ENOMEM); - } memset( dev_priv->warp_pipe_phys, 0, sizeof(dev_priv->warp_pipe_phys) ); @@ -136,35 +123,36 @@ static int mga_warp_install_g200_microcode( drm_mga_private_t *dev_priv ) { unsigned char *vcbase = dev_priv->warp->handle; unsigned long pcbase = dev_priv->warp->offset; - unsigned int size; - size = mga_warp_g200_microcode_size( dev_priv ); - if ( size > dev_priv->warp->size ) { - DRM_ERROR( "microcode too large! (%u > %lu)\n", - size, dev_priv->warp->size ); - return DRM_ERR(ENOMEM); - } + memset(dev_priv->warp_pipe_phys, 0, sizeof(dev_priv->warp_pipe_phys)); - memset( dev_priv->warp_pipe_phys, 0, - sizeof(dev_priv->warp_pipe_phys) ); - - WARP_UCODE_INSTALL( warp_g200_tgz, MGA_WARP_TGZ ); - WARP_UCODE_INSTALL( warp_g200_tgzf, MGA_WARP_TGZF ); - WARP_UCODE_INSTALL( warp_g200_tgza, MGA_WARP_TGZA ); - WARP_UCODE_INSTALL( warp_g200_tgzaf, MGA_WARP_TGZAF ); - WARP_UCODE_INSTALL( warp_g200_tgzs, MGA_WARP_TGZS ); - WARP_UCODE_INSTALL( warp_g200_tgzsf, MGA_WARP_TGZSF ); - WARP_UCODE_INSTALL( warp_g200_tgzsa, MGA_WARP_TGZSA ); - WARP_UCODE_INSTALL( warp_g200_tgzsaf, MGA_WARP_TGZSAF ); + WARP_UCODE_INSTALL(warp_g200_tgz, MGA_WARP_TGZ); + WARP_UCODE_INSTALL(warp_g200_tgzf, MGA_WARP_TGZF); + WARP_UCODE_INSTALL(warp_g200_tgza, MGA_WARP_TGZA); + WARP_UCODE_INSTALL(warp_g200_tgzaf, MGA_WARP_TGZAF); + WARP_UCODE_INSTALL(warp_g200_tgzs, MGA_WARP_TGZS); + WARP_UCODE_INSTALL(warp_g200_tgzsf, MGA_WARP_TGZSF); + WARP_UCODE_INSTALL(warp_g200_tgzsa, MGA_WARP_TGZSA); + WARP_UCODE_INSTALL(warp_g200_tgzsaf, MGA_WARP_TGZSAF); return 0; } int mga_warp_install_microcode( drm_mga_private_t *dev_priv ) { - switch ( dev_priv->chipset ) { + const unsigned int size = mga_warp_microcode_size(dev_priv); + + DRM_DEBUG("MGA ucode size = %d bytes\n", size); + if (size > dev_priv->warp->size) { + DRM_ERROR("microcode too large! (%u > %lu)\n", + size, dev_priv->warp->size); + return DRM_ERR(ENOMEM); + } + + switch (dev_priv->chipset) { case MGA_CARD_TYPE_G400: - return mga_warp_install_g400_microcode( dev_priv ); + case MGA_CARD_TYPE_G550: + return mga_warp_install_g400_microcode(dev_priv); case MGA_CARD_TYPE_G200: return mga_warp_install_g200_microcode( dev_priv ); default: @@ -182,10 +170,11 @@ int mga_warp_init( drm_mga_private_t *dev_priv ) */ switch ( dev_priv->chipset ) { case MGA_CARD_TYPE_G400: - MGA_WRITE( MGA_WIADDR2, MGA_WMODE_SUSPEND ); - MGA_WRITE( MGA_WGETMSB, 0x00000E00 ); - MGA_WRITE( MGA_WVRTXSZ, 0x00001807 ); - MGA_WRITE( MGA_WACCEPTSEQ, 0x18000000 ); + case MGA_CARD_TYPE_G550: + MGA_WRITE(MGA_WIADDR2, MGA_WMODE_SUSPEND); + MGA_WRITE(MGA_WGETMSB, 0x00000E00); + MGA_WRITE(MGA_WVRTXSZ, 0x00001807); + MGA_WRITE(MGA_WACCEPTSEQ, 0x18000000); break; case MGA_CARD_TYPE_G200: MGA_WRITE( MGA_WIADDR, MGA_WMODE_SUSPEND ); From d01cff408057fa925b2f766fa1fd5a305fd1acbf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 19:24:35 +1000 Subject: [PATCH 015/584] drm: add mga driver callbacks Add some missing driver callback for the PCI support Signed-off-by: Dave Airlie --- drivers/char/drm/mga_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/drm/mga_drv.c b/drivers/char/drm/mga_drv.c index 94af13bc66a4..daabbba3b297 100644 --- a/drivers/char/drm/mga_drv.c +++ b/drivers/char/drm/mga_drv.c @@ -41,6 +41,12 @@ static int mga_driver_device_is_agp(drm_device_t * dev); static int postinit( struct drm_device *dev, unsigned long flags ) { + drm_mga_private_t * const dev_priv = + (drm_mga_private_t *) dev->dev_private; + + dev_priv->mmio_base = pci_resource_start(dev->pdev, 1); + dev_priv->mmio_size = pci_resource_len(dev->pdev, 1); + dev->counters += 3; dev->types[6] = _DRM_STAT_IRQ; dev->types[7] = _DRM_STAT_PRIMARY; @@ -80,6 +86,8 @@ extern int mga_max_ioctl; static struct drm_driver driver = { .driver_features = DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | DRIVER_USE_MTRR | DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL, + .preinit = mga_driver_preinit, + .postcleanup = mga_driver_postcleanup, .pretakedown = mga_driver_pretakedown, .dma_quiescent = mga_driver_dma_quiescent, .device_is_agp = mga_driver_device_is_agp, From 836cf0465c422ee6d654060edd7c620d9cf0c09c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 10 Jul 2005 19:27:04 +1000 Subject: [PATCH 016/584] drm: cleanup buffer/map code This is a patch from DRM CVS that cleans up some code that was in CVS that I never moved to the kernel, this patch produces the result of the cleanups and puts it into the kernel drm. From: Eric Anholt , Jon Smirl, Dave Airlie Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 9 +- drivers/char/drm/drm_bufs.c | 245 +++++++++++++++++++++++----------- drivers/char/drm/drm_drv.c | 68 +++------- drivers/char/drm/drm_fops.c | 6 - drivers/char/drm/drm_stub.c | 5 + drivers/char/drm/radeon_cp.c | 21 +++ drivers/char/drm/radeon_drv.c | 1 + drivers/char/drm/radeon_drv.h | 1 + 8 files changed, 218 insertions(+), 138 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index a7fdcece6129..fb2af9279148 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -908,10 +908,11 @@ extern int drm_addbufs_agp(drm_device_t *dev, drm_buf_desc_t *request); extern int drm_addbufs_pci(drm_device_t *dev, drm_buf_desc_t *request); extern int drm_addmap(drm_device_t *dev, unsigned int offset, unsigned int size, drm_map_type_t type, - drm_map_flags_t flags, drm_map_t **map_ptr); + drm_map_flags_t flags, drm_local_map_t **map_ptr); extern int drm_addmap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); -extern int drm_rmmap(drm_device_t *dev, void *handle); +extern int drm_rmmap(drm_device_t *dev, drm_local_map_t *map); +extern int drm_rmmap_locked(drm_device_t *dev, drm_local_map_t *map); extern int drm_rmmap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); @@ -926,6 +927,10 @@ extern int drm_freebufs( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int drm_mapbufs( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); +extern unsigned long drm_get_resource_start(drm_device_t *dev, + unsigned int resource); +extern unsigned long drm_get_resource_len(drm_device_t *dev, + unsigned int resource); /* DMA support (drm_dma.h) */ extern int drm_dma_setup(drm_device_t *dev); diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index 06b01215fdf3..fcc8d244f46f 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -36,29 +36,33 @@ #include #include "drmP.h" -/** - * Compute size order. Returns the exponent of the smaller power of two which - * is greater or equal to given number. - * - * \param size size. - * \return order. - * - * \todo Can be made faster. - */ -int drm_order( unsigned long size ) +unsigned long drm_get_resource_start(drm_device_t *dev, unsigned int resource) { - int order; - unsigned long tmp; - - for (order = 0, tmp = size >> 1; tmp; tmp >>= 1, order++) - ; - - if (size & (size - 1)) - ++order; - - return order; + return pci_resource_start(dev->pdev, resource); +} +EXPORT_SYMBOL(drm_get_resource_start); + +unsigned long drm_get_resource_len(drm_device_t *dev, unsigned int resource) +{ + return pci_resource_len(dev->pdev, resource); +} +EXPORT_SYMBOL(drm_get_resource_len); + +static drm_local_map_t *drm_find_matching_map(drm_device_t *dev, + drm_local_map_t *map) +{ + struct list_head *list; + + list_for_each(list, &dev->maplist->head) { + drm_map_list_t *entry = list_entry(list, drm_map_list_t, head); + if (entry->map && map->type == entry->map->type && + entry->map->offset == map->offset) { + return entry->map; + } + } + + return NULL; } -EXPORT_SYMBOL(drm_order); #ifdef CONFIG_COMPAT /* @@ -89,6 +93,7 @@ int drm_addmap(drm_device_t * dev, unsigned int offset, drm_map_t *map; drm_map_list_t *list; drm_dma_handle_t *dmah; + drm_local_map_t *found_map; map = drm_alloc( sizeof(*map), DRM_MEM_MAPS ); if ( !map ) @@ -129,6 +134,24 @@ int drm_addmap(drm_device_t * dev, unsigned int offset, #ifdef __alpha__ map->offset += dev->hose->mem_space->start; #endif + /* Some drivers preinitialize some maps, without the X Server + * needing to be aware of it. Therefore, we just return success + * when the server tries to create a duplicate map. + */ + found_map = drm_find_matching_map(dev, map); + if (found_map != NULL) { + if (found_map->size != map->size) { + DRM_DEBUG("Matching maps of type %d with " + "mismatched sizes, (%ld vs %ld)\n", + map->type, map->size, found_map->size); + found_map->size = map->size; + } + + drm_free(map, sizeof(*map), DRM_MEM_MAPS); + *map_ptr = found_map; + return 0; + } + if (drm_core_has_MTRR(dev)) { if ( map->type == _DRM_FRAME_BUFFER || (map->flags & _DRM_WRITE_COMBINING) ) { @@ -270,93 +293,136 @@ int drm_addmap_ioctl(struct inode *inode, struct file *filp, * * \sa drm_addmap */ -int drm_rmmap(drm_device_t *dev, void *handle) +int drm_rmmap_locked(drm_device_t *dev, drm_local_map_t *map) { struct list_head *list; drm_map_list_t *r_list = NULL; - drm_vma_entry_t *pt, *prev; - drm_map_t *map; - int found_maps = 0; + drm_dma_handle_t dmah; - down(&dev->struct_sem); - list = &dev->maplist->head; + /* Find the list entry for the map and remove it */ list_for_each(list, &dev->maplist->head) { r_list = list_entry(list, drm_map_list_t, head); - if(r_list->map && - r_list->map->handle == handle && - r_list->map->flags & _DRM_REMOVABLE) break; - } - - /* List has wrapped around to the head pointer, or its empty we didn't - * find anything. - */ - if(list == (&dev->maplist->head)) { - up(&dev->struct_sem); - return -EINVAL; - } - map = r_list->map; - list_del(list); - drm_free(list, sizeof(*list), DRM_MEM_MAPS); - - for (pt = dev->vmalist, prev = NULL; pt; prev = pt, pt = pt->next) { - if (pt->vma->vm_private_data == map) found_maps++; - } - - if(!found_maps) { - drm_dma_handle_t dmah; - - switch (map->type) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - if (drm_core_has_MTRR(dev)) { - if (map->mtrr >= 0) { - int retcode; - retcode = mtrr_del(map->mtrr, - map->offset, - map->size); - DRM_DEBUG("mtrr_del = %d\n", retcode); - } - } - drm_ioremapfree(map->handle, map->size, dev); - break; - case _DRM_SHM: - vfree(map->handle); - break; - case _DRM_AGP: - case _DRM_SCATTER_GATHER: - break; - case _DRM_CONSISTENT: - dmah.vaddr = map->handle; - dmah.busaddr = map->offset; - dmah.size = map->size; - __drm_pci_free(dev, &dmah); + if (r_list->map == map) { + list_del(list); + drm_free(list, sizeof(*list), DRM_MEM_MAPS); break; } - drm_free(map, sizeof(*map), DRM_MEM_MAPS); } - up(&dev->struct_sem); + + /* List has wrapped around to the head pointer, or it's empty and we + * didn't find anything. + */ + if (list == (&dev->maplist->head)) { + return -EINVAL; + } + + switch (map->type) { + case _DRM_REGISTERS: + drm_ioremapfree(map->handle, map->size, dev); + /* FALLTHROUGH */ + case _DRM_FRAME_BUFFER: + if (drm_core_has_MTRR(dev) && map->mtrr >= 0) { + int retcode; + retcode = mtrr_del(map->mtrr, map->offset, + map->size); + DRM_DEBUG ("mtrr_del=%d\n", retcode); + } + break; + case _DRM_SHM: + vfree(map->handle); + break; + case _DRM_AGP: + case _DRM_SCATTER_GATHER: + break; + case _DRM_CONSISTENT: + dmah.vaddr = map->handle; + dmah.busaddr = map->offset; + dmah.size = map->size; + __drm_pci_free(dev, &dmah); + break; + } + drm_free(map, sizeof(*map), DRM_MEM_MAPS); + return 0; } +EXPORT_SYMBOL(drm_rmmap_locked); + +int drm_rmmap(drm_device_t *dev, drm_local_map_t *map) +{ + int ret; + + down(&dev->struct_sem); + ret = drm_rmmap_locked(dev, map); + up(&dev->struct_sem); + + return ret; +} EXPORT_SYMBOL(drm_rmmap); +/* The rmmap ioctl appears to be unnecessary. All mappings are torn down on + * the last close of the device, and this is necessary for cleanup when things + * exit uncleanly. Therefore, having userland manually remove mappings seems + * like a pointless exercise since they're going away anyway. + * + * One use case might be after addmap is allowed for normal users for SHM and + * gets used by drivers that the server doesn't need to care about. This seems + * unlikely. + */ int drm_rmmap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { drm_file_t *priv = filp->private_data; drm_device_t *dev = priv->head->dev; drm_map_t request; + drm_local_map_t *map = NULL; + struct list_head *list; + int ret; if (copy_from_user(&request, (drm_map_t __user *)arg, sizeof(request))) { return -EFAULT; } - return drm_rmmap(dev, request.handle); + down(&dev->struct_sem); + list_for_each(list, &dev->maplist->head) { + drm_map_list_t *r_list = list_entry(list, drm_map_list_t, head); + + if (r_list->map && + r_list->map->handle == request.handle && + r_list->map->flags & _DRM_REMOVABLE) { + map = r_list->map; + break; + } + } + + /* List has wrapped around to the head pointer, or its empty we didn't + * find anything. + */ + if (list == (&dev->maplist->head)) { + up(&dev->struct_sem); + return -EINVAL; + } + + if (!map) + return -EINVAL; + + /* Register and framebuffer maps are permanent */ + if ((map->type == _DRM_REGISTERS) || (map->type == _DRM_FRAME_BUFFER)) { + up(&dev->struct_sem); + return 0; + } + + ret = drm_rmmap_locked(dev, map); + + up(&dev->struct_sem); + + return ret; } /** * Cleanup after an error on one of the addbufs() functions. * + * \param dev DRM device. * \param entry buffer entry where the error occurred. * * Frees any pages and buffers associated with the given entry. @@ -1470,3 +1536,26 @@ int drm_mapbufs( struct inode *inode, struct file *filp, return retcode; } +/** + * Compute size order. Returns the exponent of the smaller power of two which + * is greater or equal to given number. + * + * \param size size. + * \return order. + * + * \todo Can be made faster. + */ +int drm_order( unsigned long size ) +{ + int order; + unsigned long tmp; + + for (order = 0, tmp = size >> 1; tmp; tmp >>= 1, order++) + ; + + if (size & (size - 1)) + ++order; + + return order; +} +EXPORT_SYMBOL(drm_order); diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index 733af58fb3ac..6ba48f346fcf 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -132,9 +132,7 @@ static drm_ioctl_desc_t drm_ioctls[] = { int drm_takedown( drm_device_t *dev ) { drm_magic_entry_t *pt, *next; - drm_map_t *map; drm_map_list_t *r_list; - struct list_head *list, *list_next; drm_vma_entry_t *vma, *vma_next; int i; @@ -142,6 +140,7 @@ int drm_takedown( drm_device_t *dev ) if (dev->driver->pretakedown) dev->driver->pretakedown(dev); + DRM_DEBUG("driver pretakedown completed\n"); if (dev->unique) { drm_free(dev->unique, strlen(dev->unique) + 1, DRM_MEM_DRIVER); @@ -184,6 +183,10 @@ int drm_takedown( drm_device_t *dev ) dev->agp->acquired = 0; dev->agp->enabled = 0; } + if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg) { + drm_sg_cleanup(dev->sg); + dev->sg = NULL; + } /* Clear vma list (only built for debugging) */ if ( dev->vmalist ) { @@ -195,56 +198,11 @@ int drm_takedown( drm_device_t *dev ) } if( dev->maplist ) { - list_for_each_safe( list, list_next, &dev->maplist->head ) { - r_list = (drm_map_list_t *)list; - - if ( ( map = r_list->map ) ) { - drm_dma_handle_t dmah; - - switch ( map->type ) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - if (drm_core_has_MTRR(dev)) { - if ( map->mtrr >= 0 ) { - int retcode; - retcode = mtrr_del( map->mtrr, - map->offset, - map->size ); - DRM_DEBUG( "mtrr_del=%d\n", retcode ); - } - } - drm_ioremapfree( map->handle, map->size, dev ); - break; - case _DRM_SHM: - vfree(map->handle); - break; - - case _DRM_AGP: - /* Do nothing here, because this is all - * handled in the AGP/GART driver. - */ - break; - case _DRM_SCATTER_GATHER: - /* Handle it */ - if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg) { - drm_sg_cleanup(dev->sg); - dev->sg = NULL; - } - break; - case _DRM_CONSISTENT: - dmah.vaddr = map->handle; - dmah.busaddr = map->offset; - dmah.size = map->size; - __drm_pci_free(dev, &dmah); - break; - } - drm_free(map, sizeof(*map), DRM_MEM_MAPS); - } - list_del( list ); - drm_free(r_list, sizeof(*r_list), DRM_MEM_MAPS); - } - drm_free(dev->maplist, sizeof(*dev->maplist), DRM_MEM_MAPS); - dev->maplist = NULL; + while (!list_empty(&dev->maplist->head)) { + struct list_head *list = dev->maplist->head.next; + r_list = list_entry(list, drm_map_list_t, head); + drm_rmmap_locked(dev, r_list->map); + } } if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE) && dev->queuelist ) { @@ -273,6 +231,7 @@ int drm_takedown( drm_device_t *dev ) } up( &dev->struct_sem ); + DRM_DEBUG("takedown completed\n"); return 0; } @@ -334,6 +293,11 @@ static void drm_cleanup( drm_device_t *dev ) drm_takedown( dev ); + if (dev->maplist) { + drm_free(dev->maplist, sizeof(*dev->maplist), DRM_MEM_MAPS); + dev->maplist = NULL; + } + drm_ctxbitmap_cleanup( dev ); if (drm_core_has_MTRR(dev) && drm_core_has_AGP(dev) && diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c index 10e64fde8d78..a1f4e9cd64ed 100644 --- a/drivers/char/drm/drm_fops.c +++ b/drivers/char/drm/drm_fops.c @@ -71,12 +71,6 @@ static int drm_setup( drm_device_t *dev ) dev->magiclist[i].tail = NULL; } - dev->maplist = drm_alloc(sizeof(*dev->maplist), - DRM_MEM_MAPS); - if(dev->maplist == NULL) return -ENOMEM; - memset(dev->maplist, 0, sizeof(*dev->maplist)); - INIT_LIST_HEAD(&dev->maplist->head); - dev->ctxlist = drm_alloc(sizeof(*dev->ctxlist), DRM_MEM_CTXLIST); if(dev->ctxlist == NULL) return -ENOMEM; diff --git a/drivers/char/drm/drm_stub.c b/drivers/char/drm/drm_stub.c index 068ca9a8b0b4..95a976c96eb8 100644 --- a/drivers/char/drm/drm_stub.c +++ b/drivers/char/drm/drm_stub.c @@ -75,6 +75,11 @@ static int drm_fill_in_dev(drm_device_t *dev, struct pci_dev *pdev, const struct dev->pci_func = PCI_FUNC(pdev->devfn); dev->irq = pdev->irq; + dev->maplist = drm_calloc(1, sizeof(*dev->maplist), DRM_MEM_MAPS); + if (dev->maplist == NULL) + return -ENOMEM; + INIT_LIST_HEAD(&dev->maplist->head); + /* the DRM has 6 basic counters */ dev->counters = 6; dev->types[0] = _DRM_STAT_LOCK; diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c index 20bcf872b348..8255cc6fdc28 100644 --- a/drivers/char/drm/radeon_cp.c +++ b/drivers/char/drm/radeon_cp.c @@ -2048,6 +2048,27 @@ int radeon_driver_preinit(struct drm_device *dev, unsigned long flags) return ret; } +int radeon_presetup(struct drm_device *dev) +{ + int ret; + drm_local_map_t *map; + drm_radeon_private_t *dev_priv = dev->dev_private; + + ret = drm_addmap(dev, drm_get_resource_start(dev, 2), + drm_get_resource_len(dev, 2), _DRM_REGISTERS, + _DRM_READ_ONLY, &dev_priv->mmio); + if (ret != 0) + return ret; + + ret = drm_addmap(dev, drm_get_resource_start(dev, 0), + drm_get_resource_len(dev, 0), _DRM_FRAME_BUFFER, + _DRM_WRITE_COMBINING, &map); + if (ret != 0) + return ret; + + return 0; +} + int radeon_driver_postcleanup(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; diff --git a/drivers/char/drm/radeon_drv.c b/drivers/char/drm/radeon_drv.c index 18e4e5b0952f..e0682f64b400 100644 --- a/drivers/char/drm/radeon_drv.c +++ b/drivers/char/drm/radeon_drv.c @@ -76,6 +76,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG | DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL, .dev_priv_size = sizeof(drm_radeon_buf_priv_t), .preinit = radeon_driver_preinit, + .presetup = radeon_presetup, .postcleanup = radeon_driver_postcleanup, .prerelease = radeon_driver_prerelease, .pretakedown = radeon_driver_pretakedown, diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h index 771aa80a5e8c..e701dffe978d 100644 --- a/drivers/char/drm/radeon_drv.h +++ b/drivers/char/drm/radeon_drv.h @@ -290,6 +290,7 @@ extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ); extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv ); extern int radeon_driver_preinit(struct drm_device *dev, unsigned long flags); +extern int radeon_presetup(struct drm_device *dev); extern int radeon_driver_postcleanup(struct drm_device *dev); extern int radeon_mem_alloc( DRM_IOCTL_ARGS ); From 1c7c1fe51629d82e1dff22b2c4d409b252fb2b05 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:31:19 +0100 Subject: [PATCH 017/584] [SERIAL] Rename pci_board to pciserial_board. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 356f5556759a..84c8f8f592ca 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -54,7 +54,7 @@ /* Use the Base address register size to cap number of ports */ #define FL_REGION_SZ_CAP 0x0100 -struct pci_board { +struct pciserial_board { unsigned int flags; unsigned int num_ports; unsigned int base_baud; @@ -75,7 +75,7 @@ struct pci_serial_quirk { u32 subvendor; u32 subdevice; int (*init)(struct pci_dev *dev); - int (*setup)(struct pci_dev *dev, struct pci_board *board, + int (*setup)(struct pci_dev *dev, struct pciserial_board *, struct uart_port *port, int idx); void (*exit)(struct pci_dev *dev); }; @@ -136,7 +136,7 @@ setup_port(struct pci_dev *dev, struct uart_port *port, * Not that ugly ;) -- HW */ static int -afavlab_setup(struct pci_dev *dev, struct pci_board *board, +afavlab_setup(struct pci_dev *dev, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -189,7 +189,7 @@ static int __devinit pci_hp_diva_init(struct pci_dev *dev) * some serial ports are supposed to be hidden on certain models. */ static int -pci_hp_diva_setup(struct pci_dev *dev, struct pci_board *board, +pci_hp_diva_setup(struct pci_dev *dev, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int offset = board->first_offset; @@ -307,7 +307,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev) /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */ static int -sbs_setup(struct pci_dev *dev, struct pci_board *board, +sbs_setup(struct pci_dev *dev, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -523,7 +523,7 @@ static int __devinit pci_timedia_init(struct pci_dev *dev) * Ugh, this is ugly as all hell --- TYT */ static int -pci_timedia_setup(struct pci_dev *dev, struct pci_board *board, +pci_timedia_setup(struct pci_dev *dev, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar = 0, offset = board->first_offset; @@ -556,7 +556,8 @@ pci_timedia_setup(struct pci_dev *dev, struct pci_board *board, * Some Titan cards are also a little weird */ static int -titan_400l_800l_setup(struct pci_dev *dev, struct pci_board *board, +titan_400l_800l_setup(struct pci_dev *dev, + struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -593,7 +594,7 @@ static int __devinit pci_netmos_init(struct pci_dev *dev) } static int -pci_default_setup(struct pci_dev *dev, struct pci_board *board, +pci_default_setup(struct pci_dev *dev, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset, maxnr; @@ -990,7 +991,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev) } static _INLINE_ int -get_pci_irq(struct pci_dev *dev, struct pci_board *board, int idx) +get_pci_irq(struct pci_dev *dev, struct pciserial_board *board, int idx) { if (board->flags & FL_NOIRQ) return 0; @@ -1113,7 +1114,7 @@ enum pci_board_num_t { * see first lines of serial_in() and serial_out() in 8250.c */ -static struct pci_board pci_boards[] __devinitdata = { +static struct pciserial_board pci_boards[] __devinitdata = { [pbn_default] = { .flags = FL_BASE0, .num_ports = 1, @@ -1565,7 +1566,7 @@ static struct pci_board pci_boards[] __devinitdata = { * serial specs. Returns 0 on success, 1 on failure. */ static int __devinit -serial_pci_guess_board(struct pci_dev *dev, struct pci_board *board) +serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; @@ -1630,7 +1631,8 @@ serial_pci_guess_board(struct pci_dev *dev, struct pci_board *board) } static inline int -serial_pci_matches(struct pci_board *board, struct pci_board *guessed) +serial_pci_matches(struct pciserial_board *board, + struct pciserial_board *guessed) { return board->num_ports == guessed->num_ports && @@ -1648,7 +1650,7 @@ static int __devinit pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { struct serial_private *priv; - struct pci_board *board, tmp; + struct pciserial_board *board, tmp; struct pci_serial_quirk *quirk; int rc, nr_ports, i; @@ -1669,7 +1671,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) * Use a copy of the pci_board entry for this; * avoid changing entries in the table. */ - memcpy(&tmp, board, sizeof(struct pci_board)); + memcpy(&tmp, board, sizeof(struct pciserial_board)); board = &tmp; /* @@ -1685,7 +1687,8 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) * detect this boards settings with our heuristic, * then we no longer need this entry. */ - memcpy(&tmp, &pci_boards[pbn_default], sizeof(struct pci_board)); + memcpy(&tmp, &pci_boards[pbn_default], + sizeof(struct pciserial_board)); rc = serial_pci_guess_board(dev, &tmp); if (rc == 0 && serial_pci_matches(board, &tmp)) moan_device("Redundant entry in serial pci_table.", From 72ce9a83331afdd4df944f210a5210bf5acb7d6a Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:32:04 +0100 Subject: [PATCH 018/584] [SERIAL] Factor out the common setup from the per-serial port loop. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 84c8f8f592ca..8f2617206e8f 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -110,8 +110,9 @@ setup_port(struct pci_dev *dev, struct uart_port *port, if (bar >= PCI_NUM_BAR_RESOURCES) return -EINVAL; + base = pci_resource_start(dev, bar); + if (pci_resource_flags(dev, bar) & IORESOURCE_MEM) { - base = pci_resource_start(dev, bar); len = pci_resource_len(dev, bar); if (!priv->remapped_bar[bar]) @@ -120,13 +121,16 @@ setup_port(struct pci_dev *dev, struct uart_port *port, return -ENOMEM; port->iotype = UPIO_MEM; + port->iobase = 0; port->mapbase = base + offset; port->membase = priv->remapped_bar[bar] + offset; port->regshift = regshift; } else { - base = pci_resource_start(dev, bar) + offset; port->iotype = UPIO_PORT; - port->iobase = base; + port->iobase = base + offset; + port->mapbase = 0; + port->membase = NULL; + port->regshift = 0; } return 0; } @@ -991,7 +995,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev) } static _INLINE_ int -get_pci_irq(struct pci_dev *dev, struct pciserial_board *board, int idx) +get_pci_irq(struct pci_dev *dev, struct pciserial_board *board) { if (board->flags & FL_NOIRQ) return 0; @@ -1649,6 +1653,7 @@ serial_pci_matches(struct pciserial_board *board, static int __devinit pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { + struct uart_port serial_port; struct serial_private *priv; struct pciserial_board *board, tmp; struct pci_serial_quirk *quirk; @@ -1731,17 +1736,16 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) priv->quirk = quirk; pci_set_drvdata(dev, priv); - for (i = 0; i < nr_ports; i++) { - struct uart_port serial_port; - memset(&serial_port, 0, sizeof(struct uart_port)); + memset(&serial_port, 0, sizeof(struct uart_port)); + serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; + serial_port.uartclk = board->base_baud * 16; + serial_port.irq = get_pci_irq(dev, board); + serial_port.dev = &dev->dev; - serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | - UPF_SHARE_IRQ; - serial_port.uartclk = board->base_baud * 16; - serial_port.irq = get_pci_irq(dev, board, i); - serial_port.dev = &dev->dev; + for (i = 0; i < nr_ports; i++) { if (quirk->setup(dev, board, &serial_port, i)) break; + #ifdef SERIAL_DEBUG_PCI printk("Setup PCI port: port %x, irq %d, type %d\n", serial_port.iobase, serial_port.irq, serial_port.iotype); From 67d74b870725448e0108984eec551609771e6b73 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:33:03 +0100 Subject: [PATCH 019/584] [SERIAL] Collapse the SIIG quirk entries Collapse all the SIIG quirk entries into one. SIIG10x cards all have PCI device IDs of 0x10xx, SIIG20x cards all have PCI device IDs of 0x20xx. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 157 +++++--------------------------------- 1 file changed, 18 insertions(+), 139 deletions(-) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 8f2617206e8f..c43de35a6c9d 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -393,6 +393,9 @@ static void __devexit sbs_exit(struct pci_dev *dev) * - 10x cards have control registers in IO and/or memory space; * - 20x cards have control registers in standard PCI configuration space. * + * Note: all 10x cards have PCI device ids 0x10.. + * all 20x cards have PCI device ids 0x20.. + * * There are also Quartet Serial cards which use Oxford Semiconductor * 16954 quad UART PCI chip clocked by 18.432 MHz quartz. * @@ -449,6 +452,19 @@ static int pci_siig20x_init(struct pci_dev *dev) return 0; } +static int pci_siig_init(struct pci_dev *dev) +{ + unsigned int type = dev->device & 0xff00; + + if (type == 0x1000) + return pci_siig10x_init(dev); + else if (type == 0x2000) + return pci_siig20x_init(dev); + + moan_device("Unknown SIIG card", dev); + return -ENODEV; +} + int pci_siig10x_fn(struct pci_dev *dev, int enable) { int ret = 0; @@ -759,152 +775,15 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .setup = sbs_setup, .exit = __devexit_p(sbs_exit), }, - /* * SIIG cards. - * It is not clear whether these could be collapsed. */ { .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_10x_550, + .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_10x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_10x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_10x_550, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_10x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_10x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_10x_550, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_10x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_10x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig10x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_20x_550, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_20x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_1S_20x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_20x_550, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_20x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_2S_20x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_20x_550, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_20x_650, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_SIIG, - .device = PCI_DEVICE_ID_SIIG_4S_20x_850, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .init = pci_siig20x_init, + .init = pci_siig_init, .setup = pci_default_setup, }, /* From 70db3d91a5228c98603c55fa06c87184a1f9f6db Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:34:27 +0100 Subject: [PATCH 020/584] [SERIAL] Pass around serial_private instead of pci_dev Pass the serial_private structure via the setup method instead of the pci_dev. We don't want to assume that the pci_dev's driver data is a pointer to serial_private. Instead, put the pci_dev inside serial_private. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 40 +++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index c43de35a6c9d..7ca07651c10c 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -63,6 +63,8 @@ struct pciserial_board { unsigned int first_offset; }; +struct serial_private; + /* * init function returns: * > 0 - number of ports @@ -75,7 +77,7 @@ struct pci_serial_quirk { u32 subvendor; u32 subdevice; int (*init)(struct pci_dev *dev); - int (*setup)(struct pci_dev *dev, struct pciserial_board *, + int (*setup)(struct serial_private *, struct pciserial_board *, struct uart_port *port, int idx); void (*exit)(struct pci_dev *dev); }; @@ -83,6 +85,7 @@ struct pci_serial_quirk { #define PCI_NUM_BAR_RESOURCES 6 struct serial_private { + struct pci_dev *dev; unsigned int nr; void __iomem *remapped_bar[PCI_NUM_BAR_RESOURCES]; struct pci_serial_quirk *quirk; @@ -101,10 +104,10 @@ static void moan_device(const char *str, struct pci_dev *dev) } static int -setup_port(struct pci_dev *dev, struct uart_port *port, +setup_port(struct serial_private *priv, struct uart_port *port, int bar, int offset, int regshift) { - struct serial_private *priv = pci_get_drvdata(dev); + struct pci_dev *dev = priv->dev; unsigned long base, len; if (bar >= PCI_NUM_BAR_RESOURCES) @@ -140,7 +143,7 @@ setup_port(struct pci_dev *dev, struct uart_port *port, * Not that ugly ;) -- HW */ static int -afavlab_setup(struct pci_dev *dev, struct pciserial_board *board, +afavlab_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -153,7 +156,7 @@ afavlab_setup(struct pci_dev *dev, struct pciserial_board *board, offset += (idx - 4) * board->uart_offset; } - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } /* @@ -193,13 +196,13 @@ static int __devinit pci_hp_diva_init(struct pci_dev *dev) * some serial ports are supposed to be hidden on certain models. */ static int -pci_hp_diva_setup(struct pci_dev *dev, struct pciserial_board *board, +pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int offset = board->first_offset; unsigned int bar = FL_GET_BASE(board->flags); - switch (dev->subsystem_device) { + switch (priv->dev->subsystem_device) { case PCI_DEVICE_ID_HP_DIVA_MAESTRO: if (idx == 3) idx++; @@ -216,7 +219,7 @@ pci_hp_diva_setup(struct pci_dev *dev, struct pciserial_board *board, offset += idx * board->uart_offset; - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } /* @@ -311,7 +314,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev) /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */ static int -sbs_setup(struct pci_dev *dev, struct pciserial_board *board, +sbs_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -327,7 +330,7 @@ sbs_setup(struct pci_dev *dev, struct pciserial_board *board, } else /* we have only 8 ports on PMC-OCTALPRO */ return 1; - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } /* @@ -543,7 +546,7 @@ static int __devinit pci_timedia_init(struct pci_dev *dev) * Ugh, this is ugly as all hell --- TYT */ static int -pci_timedia_setup(struct pci_dev *dev, struct pciserial_board *board, +pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar = 0, offset = board->first_offset; @@ -569,14 +572,14 @@ pci_timedia_setup(struct pci_dev *dev, struct pciserial_board *board, bar = idx - 2; } - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } /* * Some Titan cards are also a little weird */ static int -titan_400l_800l_setup(struct pci_dev *dev, +titan_400l_800l_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { @@ -594,7 +597,7 @@ titan_400l_800l_setup(struct pci_dev *dev, offset = (idx - 2) * board->uart_offset; } - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } static int __devinit pci_xircom_init(struct pci_dev *dev) @@ -614,7 +617,7 @@ static int __devinit pci_netmos_init(struct pci_dev *dev) } static int -pci_default_setup(struct pci_dev *dev, struct pciserial_board *board, +pci_default_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset, maxnr; @@ -625,13 +628,13 @@ pci_default_setup(struct pci_dev *dev, struct pciserial_board *board, else offset += idx * board->uart_offset; - maxnr = (pci_resource_len(dev, bar) - board->first_offset) / + maxnr = (pci_resource_len(priv->dev, bar) - board->first_offset) / (8 << board->reg_shift); if (board->flags & FL_REGION_SZ_CAP && idx >= maxnr) return 1; - return setup_port(dev, port, bar, offset, board->reg_shift); + return setup_port(priv, port, bar, offset, board->reg_shift); } /* This should be in linux/pci_ids.h */ @@ -1612,6 +1615,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) memset(priv, 0, sizeof(struct serial_private) + sizeof(unsigned int) * nr_ports); + priv->dev = dev; priv->quirk = quirk; pci_set_drvdata(dev, priv); @@ -1622,7 +1626,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) serial_port.dev = &dev->dev; for (i = 0; i < nr_ports; i++) { - if (quirk->setup(dev, board, &serial_port, i)) + if (quirk->setup(priv, board, &serial_port, i)) break; #ifdef SERIAL_DEBUG_PCI From 241fc4367b3ca5d407b043599ed980304a70b91f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:35:54 +0100 Subject: [PATCH 021/584] [SERIAL] Expose 8250_pci setup/removal/suspend/resume functions Re-jig the setup/removal/suspend/resume of 8250 pci ports so that they know slightly less about how they're attached to a PCI device. Expose this as the new interface for registering PCI serial ports, as well as the pciserial_board structure and associated flag definitions. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 273 ++++++++++++++++++++------------------ include/linux/8250_pci.h | 38 ++++++ 2 files changed, 179 insertions(+), 132 deletions(-) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 7ca07651c10c..4e9084edfc7e 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -33,38 +33,6 @@ #undef SERIAL_DEBUG_PCI -/* - * Definitions for PCI support. - */ -#define FL_BASE_MASK 0x0007 -#define FL_BASE0 0x0000 -#define FL_BASE1 0x0001 -#define FL_BASE2 0x0002 -#define FL_BASE3 0x0003 -#define FL_BASE4 0x0004 -#define FL_GET_BASE(x) (x & FL_BASE_MASK) - -/* Use successive BARs (PCI base address registers), - else use offset into some specified BAR */ -#define FL_BASE_BARS 0x0008 - -/* do not assign an irq */ -#define FL_NOIRQ 0x0080 - -/* Use the Base address register size to cap number of ports */ -#define FL_REGION_SZ_CAP 0x0100 - -struct pciserial_board { - unsigned int flags; - unsigned int num_ports; - unsigned int base_baud; - unsigned int uart_offset; - unsigned int reg_shift; - unsigned int first_offset; -}; - -struct serial_private; - /* * init function returns: * > 0 - number of ports @@ -1528,6 +1496,137 @@ serial_pci_matches(struct pciserial_board *board, board->first_offset == guessed->first_offset; } +struct serial_private * +pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board) +{ + struct uart_port serial_port; + struct serial_private *priv; + struct pci_serial_quirk *quirk; + int rc, nr_ports, i; + + nr_ports = board->num_ports; + + /* + * Find an init and setup quirks. + */ + quirk = find_quirk(dev); + + /* + * Run the new-style initialization function. + * The initialization function returns: + * <0 - error + * 0 - use board->num_ports + * >0 - number of ports + */ + if (quirk->init) { + rc = quirk->init(dev); + if (rc < 0) { + priv = ERR_PTR(rc); + goto err_out; + } + if (rc) + nr_ports = rc; + } + + priv = kmalloc(sizeof(struct serial_private) + + sizeof(unsigned int) * nr_ports, + GFP_KERNEL); + if (!priv) { + priv = ERR_PTR(-ENOMEM); + goto err_deinit; + } + + memset(priv, 0, sizeof(struct serial_private) + + sizeof(unsigned int) * nr_ports); + + priv->dev = dev; + priv->quirk = quirk; + + memset(&serial_port, 0, sizeof(struct uart_port)); + serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; + serial_port.uartclk = board->base_baud * 16; + serial_port.irq = get_pci_irq(dev, board); + serial_port.dev = &dev->dev; + + for (i = 0; i < nr_ports; i++) { + if (quirk->setup(priv, board, &serial_port, i)) + break; + +#ifdef SERIAL_DEBUG_PCI + printk("Setup PCI port: port %x, irq %d, type %d\n", + serial_port.iobase, serial_port.irq, serial_port.iotype); +#endif + + priv->line[i] = serial8250_register_port(&serial_port); + if (priv->line[i] < 0) { + printk(KERN_WARNING "Couldn't register serial port %s: %d\n", pci_name(dev), priv->line[i]); + break; + } + } + + priv->nr = i; + + return priv; + + err_deinit: + if (quirk->exit) + quirk->exit(dev); + err_out: + return priv; +} +EXPORT_SYMBOL_GPL(pciserial_init_ports); + +void pciserial_remove_ports(struct serial_private *priv) +{ + struct pci_serial_quirk *quirk; + int i; + + for (i = 0; i < priv->nr; i++) + serial8250_unregister_port(priv->line[i]); + + for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { + if (priv->remapped_bar[i]) + iounmap(priv->remapped_bar[i]); + priv->remapped_bar[i] = NULL; + } + + /* + * Find the exit quirks. + */ + quirk = find_quirk(priv->dev); + if (quirk->exit) + quirk->exit(priv->dev); + + kfree(priv); +} +EXPORT_SYMBOL_GPL(pciserial_remove_ports); + +void pciserial_suspend_ports(struct serial_private *priv) +{ + int i; + + for (i = 0; i < priv->nr; i++) + if (priv->line[i] >= 0) + serial8250_suspend_port(priv->line[i]); +} +EXPORT_SYMBOL_GPL(pciserial_suspend_ports); + +void pciserial_resume_ports(struct serial_private *priv) +{ + int i; + + /* + * Ensure that the board is correctly configured. + */ + if (priv->quirk->init) + priv->quirk->init(priv->dev); + + for (i = 0; i < priv->nr; i++) + if (priv->line[i] >= 0) + serial8250_resume_port(priv->line[i]); +} +EXPORT_SYMBOL_GPL(pciserial_resume_ports); + /* * Probe one serial board. Unfortunately, there is no rhyme nor reason * to the arrangement of serial ports on a PCI card. @@ -1535,11 +1634,9 @@ serial_pci_matches(struct pciserial_board *board, static int __devinit pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { - struct uart_port serial_port; struct serial_private *priv; struct pciserial_board *board, tmp; - struct pci_serial_quirk *quirk; - int rc, nr_ports, i; + int rc; if (ent->driver_data >= ARRAY_SIZE(pci_boards)) { printk(KERN_ERR "pci_init_one: invalid driver_data: %ld\n", @@ -1582,72 +1679,14 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) dev); } - nr_ports = board->num_ports; - - /* - * Find an init and setup quirks. - */ - quirk = find_quirk(dev); - - /* - * Run the new-style initialization function. - * The initialization function returns: - * <0 - error - * 0 - use board->num_ports - * >0 - number of ports - */ - if (quirk->init) { - rc = quirk->init(dev); - if (rc < 0) - goto disable; - if (rc) - nr_ports = rc; + priv = pciserial_init_ports(dev, board); + if (!IS_ERR(priv)) { + pci_set_drvdata(dev, priv); + return 0; } - priv = kmalloc(sizeof(struct serial_private) + - sizeof(unsigned int) * nr_ports, - GFP_KERNEL); - if (!priv) { - rc = -ENOMEM; - goto deinit; - } + rc = PTR_ERR(priv); - memset(priv, 0, sizeof(struct serial_private) + - sizeof(unsigned int) * nr_ports); - - priv->dev = dev; - priv->quirk = quirk; - pci_set_drvdata(dev, priv); - - memset(&serial_port, 0, sizeof(struct uart_port)); - serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; - serial_port.uartclk = board->base_baud * 16; - serial_port.irq = get_pci_irq(dev, board); - serial_port.dev = &dev->dev; - - for (i = 0; i < nr_ports; i++) { - if (quirk->setup(priv, board, &serial_port, i)) - break; - -#ifdef SERIAL_DEBUG_PCI - printk("Setup PCI port: port %x, irq %d, type %d\n", - serial_port.iobase, serial_port.irq, serial_port.iotype); -#endif - - priv->line[i] = serial8250_register_port(&serial_port); - if (priv->line[i] < 0) { - printk(KERN_WARNING "Couldn't register serial port %s: %d\n", pci_name(dev), priv->line[i]); - break; - } - } - - priv->nr = i; - - return 0; - - deinit: - if (quirk->exit) - quirk->exit(dev); disable: pci_disable_device(dev); return rc; @@ -1656,42 +1695,21 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) static void __devexit pciserial_remove_one(struct pci_dev *dev) { struct serial_private *priv = pci_get_drvdata(dev); - struct pci_serial_quirk *quirk; - int i; pci_set_drvdata(dev, NULL); - for (i = 0; i < priv->nr; i++) - serial8250_unregister_port(priv->line[i]); - - for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { - if (priv->remapped_bar[i]) - iounmap(priv->remapped_bar[i]); - priv->remapped_bar[i] = NULL; - } - - /* - * Find the exit quirks. - */ - quirk = find_quirk(dev); - if (quirk->exit) - quirk->exit(dev); + pciserial_remove_ports(priv); pci_disable_device(dev); - - kfree(priv); } static int pciserial_suspend_one(struct pci_dev *dev, pm_message_t state) { struct serial_private *priv = pci_get_drvdata(dev); - if (priv) { - int i; + if (priv) + pciserial_suspend_ports(priv); - for (i = 0; i < priv->nr; i++) - serial8250_suspend_port(priv->line[i]); - } pci_save_state(dev); pci_set_power_state(dev, pci_choose_state(dev, state)); return 0; @@ -1705,21 +1723,12 @@ static int pciserial_resume_one(struct pci_dev *dev) pci_restore_state(dev); if (priv) { - int i; - /* * The device may have been disabled. Re-enable it. */ pci_enable_device(dev); - /* - * Ensure that the board is correctly configured. - */ - if (priv->quirk->init) - priv->quirk->init(dev); - - for (i = 0; i < priv->nr; i++) - serial8250_resume_port(priv->line[i]); + pciserial_resume_ports(priv); } return 0; } diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 5f3ab21b339b..192c0ff7a774 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -1,2 +1,40 @@ +/* + * Definitions for PCI support. + */ +#define FL_BASE_MASK 0x0007 +#define FL_BASE0 0x0000 +#define FL_BASE1 0x0001 +#define FL_BASE2 0x0002 +#define FL_BASE3 0x0003 +#define FL_BASE4 0x0004 +#define FL_GET_BASE(x) (x & FL_BASE_MASK) + +/* Use successive BARs (PCI base address registers), + else use offset into some specified BAR */ +#define FL_BASE_BARS 0x0008 + +/* do not assign an irq */ +#define FL_NOIRQ 0x0080 + +/* Use the Base address register size to cap number of ports */ +#define FL_REGION_SZ_CAP 0x0100 + +struct pciserial_board { + unsigned int flags; + unsigned int num_ports; + unsigned int base_baud; + unsigned int uart_offset; + unsigned int reg_shift; + unsigned int first_offset; +}; + +struct serial_private; + +struct serial_private * +pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); +void pciserial_remove_ports(struct serial_private *priv); +void pciserial_suspend_ports(struct serial_private *priv); +void pciserial_resume_ports(struct serial_private *priv); + int pci_siig10x_fn(struct pci_dev *dev, int enable); int pci_siig20x_fn(struct pci_dev *dev, int enable); From 05caac585f8abd6c0113856bc8858e3ef214d8a6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:41:18 +0100 Subject: [PATCH 022/584] [SERIAL] Convert parport_serial to use new 8250_pci interfaces Convert parport_serial to use the new 8250_pci interface, converting the table to a pciserial_board table. This also unuses the SPCI_* definitions in serialP.h, which can now be removed. Signed-off-by: Russell King --- drivers/parport/parport_serial.c | 341 +++++++++++++++---------------- drivers/serial/8250_pci.c | 21 +- include/linux/8250_pci.h | 3 - include/linux/serialP.h | 40 ---- 4 files changed, 168 insertions(+), 237 deletions(-) diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 00498e2f1205..d3dad0aac7cb 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -23,13 +23,8 @@ #include #include #include -#include -#include -#include #include -#include - enum parport_pc_pci_cards { titan_110l = 0, titan_210l, @@ -168,182 +163,147 @@ static struct pci_device_id parport_serial_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci,parport_serial_pci_tbl); -struct pci_board_no_ids { - int flags; - int num_ports; - int base_baud; - int uart_offset; - int reg_shift; - int (*init_fn)(struct pci_dev *dev, struct pci_board_no_ids *board, - int enable); - int first_uart_offset; -}; - -static int __devinit siig10x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - return pci_siig10x_fn(dev, enable); -} - -static int __devinit siig20x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - return pci_siig20x_fn(dev, enable); -} - -static int __devinit netmos_serial_init(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - board->num_ports = dev->subsystem_device & 0xf; - return 0; -} - -static struct pci_board_no_ids pci_boards[] __devinitdata = { - /* - * PCI Flags, Number of Ports, Base (Maximum) Baud Rate, - * Offset to get to next UART's registers, - * Register shift to use for memory-mapped I/O, - * Initialization function, first UART offset - */ - -// Cards not tested are marked n/t -// If you have one of these cards and it works for you, please tell me.. - -/* titan_110l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 1, 921600 }, -/* titan_210l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 2, 921600 }, -/* netmos_9xx5_combo */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, -/* netmos_9855 */ { SPCI_FL_BASE2 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, -/* avlab_1s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_2s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* avlab_2s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* avlab_2s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* siig_1s1p_10x */ { SPCI_FL_BASE2, 1, 460800, 0, 0, siig10x_init_fn }, -/* siig_2s1p_10x */ { SPCI_FL_BASE2, 1, 921600, 0, 0, siig10x_init_fn }, -/* siig_2p1s_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, -/* siig_1s1p_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, -/* siig_2s1p_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, +/* + * This table describes the serial "geometry" of these boards. Any + * quirks for these can be found in drivers/serial/8250_pci.c + * + * Cards not tested are marked n/t + * If you have one of these cards and it works for you, please tell me.. + */ +static struct pciserial_board pci_parport_serial_boards[] __devinitdata = { + [titan_110l] = { + .flags = FL_BASE1 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [titan_210l] = { + .flags = FL_BASE1 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 921600, + .uart_offset = 8, + }, + [netmos_9xx5_combo] = { + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [netmos_9855] = { + .flags = FL_BASE2 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [siig_1s1p_10x] = { + .flags = FL_BASE2, + .num_ports = 1, + .base_baud = 460800, + .uart_offset = 8, + }, + [siig_2s1p_10x] = { + .flags = FL_BASE2, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_2p1s_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_1s1p_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_2s1p_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, }; struct parport_serial_private { - int num_ser; - int line[20]; - struct pci_board_no_ids ser; + struct serial_private *serial; int num_par; struct parport *port[PARPORT_MAX]; struct parport_pc_pci par; }; -static int __devinit get_pci_port (struct pci_dev *dev, - struct pci_board_no_ids *board, - struct serial_struct *req, - int idx) -{ - unsigned long port; - int base_idx; - int max_port; - int offset; - - base_idx = SPCI_FL_GET_BASE(board->flags); - if (board->flags & SPCI_FL_BASE_TABLE) - base_idx += idx; - - if (board->flags & SPCI_FL_REGION_SZ_CAP) { - max_port = pci_resource_len(dev, base_idx) / 8; - if (idx >= max_port) - return 1; - } - - offset = board->first_uart_offset; - - /* Timedia/SUNIX uses a mixture of BARs and offsets */ - /* Ugh, this is ugly as all hell --- TYT */ - if(dev->vendor == PCI_VENDOR_ID_TIMEDIA ) /* 0x1409 */ - switch(idx) { - case 0: base_idx=0; - break; - case 1: base_idx=0; offset=8; - break; - case 2: base_idx=1; - break; - case 3: base_idx=1; offset=8; - break; - case 4: /* BAR 2*/ - case 5: /* BAR 3 */ - case 6: /* BAR 4*/ - case 7: base_idx=idx-2; /* BAR 5*/ - } - - port = pci_resource_start(dev, base_idx) + offset; - - if ((board->flags & SPCI_FL_BASE_TABLE) == 0) - port += idx * (board->uart_offset ? board->uart_offset : 8); - - if (pci_resource_flags (dev, base_idx) & IORESOURCE_IO) { - int high_bits_offset = ((sizeof(long)-sizeof(int))*8); - req->port = port; - if (high_bits_offset) - req->port_high = port >> high_bits_offset; - else - req->port_high = 0; - return 0; - } - req->io_type = SERIAL_IO_MEM; - req->iomem_base = ioremap(port, board->uart_offset); - req->iomem_reg_shift = board->reg_shift; - req->port = 0; - return req->iomem_base ? 0 : 1; -} - /* Register the serial port(s) of a PCI card. */ static int __devinit serial_register (struct pci_dev *dev, const struct pci_device_id *id) { - struct pci_board_no_ids *board; struct parport_serial_private *priv = pci_get_drvdata (dev); - struct serial_struct serial_req; - int base_baud; - int k; - int success = 0; + struct pciserial_board *board; + struct serial_private *serial; - priv->ser = pci_boards[id->driver_data]; - board = &priv->ser; - if (board->init_fn && ((board->init_fn) (dev, board, 1) != 0)) - return 1; + board = &pci_parport_serial_boards[id->driver_data]; + serial = pciserial_init_ports(dev, board); - base_baud = board->base_baud; - if (!base_baud) - base_baud = BASE_BAUD; - memset (&serial_req, 0, sizeof (serial_req)); + if (IS_ERR(serial)) + return PTR_ERR(serial); - for (k = 0; k < board->num_ports; k++) { - int line; - - if (priv->num_ser == ARRAY_SIZE (priv->line)) { - printk (KERN_WARNING - "parport_serial: %s: only %u serial lines " - "supported (%d reported)\n", pci_name (dev), - ARRAY_SIZE (priv->line), board->num_ports); - break; - } - - serial_req.irq = dev->irq; - if (get_pci_port (dev, board, &serial_req, k)) - break; - serial_req.flags = ASYNC_SKIP_TEST | ASYNC_AUTOPROBE; - serial_req.baud_base = base_baud; - line = register_serial (&serial_req); - if (line < 0) { - printk (KERN_DEBUG - "parport_serial: register_serial failed\n"); - continue; - } - priv->line[priv->num_ser++] = line; - success = 1; - } - - return success ? 0 : 1; + priv->serial = serial; + return 0; } /* Register the parallel port(s) of a PCI card. */ @@ -411,7 +371,7 @@ static int __devinit parport_serial_pci_probe (struct pci_dev *dev, priv = kmalloc (sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; - priv->num_ser = priv->num_par = 0; + memset(priv, 0, sizeof(struct parport_serial_private)); pci_set_drvdata (dev, priv); err = pci_enable_device (dev); @@ -444,15 +404,12 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev) struct parport_serial_private *priv = pci_get_drvdata (dev); int i; - // Serial ports - for (i = 0; i < priv->num_ser; i++) { - unregister_serial (priv->line[i]); + pci_set_drvdata(dev, NULL); + + // Serial ports + if (priv->serial) + pciserial_remove_ports(priv->serial); - if (priv->ser.init_fn) - (priv->ser.init_fn) (dev, &priv->ser, 0); - } - pci_set_drvdata (dev, NULL); - // Parallel ports for (i = 0; i < priv->num_par; i++) parport_pc_unregister_port (priv->port[i]); @@ -461,11 +418,47 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev) return; } +static int parport_serial_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct parport_serial_private *priv = pci_get_drvdata(dev); + + if (priv->serial) + pciserial_suspend_ports(priv->serial); + + /* FIXME: What about parport? */ + + pci_save_state(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int parport_serial_pci_resume(struct pci_dev *dev) +{ + struct parport_serial_private *priv = pci_get_drvdata(dev); + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + + /* + * The device may have been disabled. Re-enable it. + */ + pci_enable_device(dev); + + if (priv->serial) + pciserial_resume_ports(priv->serial); + + /* FIXME: What about parport? */ + + return 0; +} + static struct pci_driver parport_serial_pci_driver = { .name = "parport_serial", .id_table = parport_serial_pci_tbl, .probe = parport_serial_pci_probe, .remove = __devexit_p(parport_serial_pci_remove), + .suspend = parport_serial_pci_suspend, + .resume = parport_serial_pci_resume, }; diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 4e9084edfc7e..52b0a0558ed4 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -46,7 +46,7 @@ struct pci_serial_quirk { u32 subdevice; int (*init)(struct pci_dev *dev); int (*setup)(struct serial_private *, struct pciserial_board *, - struct uart_port *port, int idx); + struct uart_port *, int); void (*exit)(struct pci_dev *dev); }; @@ -436,25 +436,6 @@ static int pci_siig_init(struct pci_dev *dev) return -ENODEV; } -int pci_siig10x_fn(struct pci_dev *dev, int enable) -{ - int ret = 0; - if (enable) - ret = pci_siig10x_init(dev); - return ret; -} - -int pci_siig20x_fn(struct pci_dev *dev, int enable) -{ - int ret = 0; - if (enable) - ret = pci_siig20x_init(dev); - return ret; -} - -EXPORT_SYMBOL(pci_siig10x_fn); -EXPORT_SYMBOL(pci_siig20x_fn); - /* * Timedia has an explosion of boards, and to avoid the PCI table from * growing *huge*, we use this function to collapse some 70 entries diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 192c0ff7a774..3209dd46ea7d 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -35,6 +35,3 @@ pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); void pciserial_remove_ports(struct serial_private *priv); void pciserial_suspend_ports(struct serial_private *priv); void pciserial_resume_ports(struct serial_private *priv); - -int pci_siig10x_fn(struct pci_dev *dev, int enable); -int pci_siig20x_fn(struct pci_dev *dev, int enable); diff --git a/include/linux/serialP.h b/include/linux/serialP.h index 2b2f35a64d75..2b9e6b9554d5 100644 --- a/include/linux/serialP.h +++ b/include/linux/serialP.h @@ -140,44 +140,4 @@ struct rs_multiport_struct { #define ALPHA_KLUDGE_MCR 0 #endif -/* - * Definitions for PCI support. - */ -#define SPCI_FL_BASE_MASK 0x0007 -#define SPCI_FL_BASE0 0x0000 -#define SPCI_FL_BASE1 0x0001 -#define SPCI_FL_BASE2 0x0002 -#define SPCI_FL_BASE3 0x0003 -#define SPCI_FL_BASE4 0x0004 -#define SPCI_FL_GET_BASE(x) (x & SPCI_FL_BASE_MASK) - -#define SPCI_FL_IRQ_MASK (0x0007 << 4) -#define SPCI_FL_IRQBASE0 (0x0000 << 4) -#define SPCI_FL_IRQBASE1 (0x0001 << 4) -#define SPCI_FL_IRQBASE2 (0x0002 << 4) -#define SPCI_FL_IRQBASE3 (0x0003 << 4) -#define SPCI_FL_IRQBASE4 (0x0004 << 4) -#define SPCI_FL_GET_IRQBASE(x) ((x & SPCI_FL_IRQ_MASK) >> 4) - -/* Use successive BARs (PCI base address registers), - else use offset into some specified BAR */ -#define SPCI_FL_BASE_TABLE 0x0100 - -/* Use successive entries in the irq resource table */ -#define SPCI_FL_IRQ_TABLE 0x0200 - -/* Use the irq resource table instead of dev->irq */ -#define SPCI_FL_IRQRESOURCE 0x0400 - -/* Use the Base address register size to cap number of ports */ -#define SPCI_FL_REGION_SZ_CAP 0x0800 - -/* Do not use irq sharing for this device */ -#define SPCI_FL_NO_SHIRQ 0x1000 - -/* This is a PNP device */ -#define SPCI_FL_ISPNP 0x2000 - -#define SPCI_FL_PNPDEFAULT (SPCI_FL_IRQRESOURCE|SPCI_FL_ISPNP) - #endif /* _LINUX_SERIAL_H */ From 4689ced99b18937e28c0f6c190394ccc3c61d651 Mon Sep 17 00:00:00 2001 From: Peer Chen Date: Fri, 29 Jul 2005 15:33:58 -0400 Subject: [PATCH 023/584] [netdrvr] add 'uli526x' driver (a tulip clone) We want to extract our LAN card driver from tulip core driver and make a new file uli526x.c at tulip folder, because we have added some ethtool interface support and non-eprom support in our driver and may be other change in the futher. If our controllers support are still contained in the tulip core driver, I think it'll increase the complexity of maintenance, you know, tulip core driver include several files and support so many other controllers. Furthermore, I tested the newest kernel 2.6.12 and I found the tulip driver can not work on our lan controller, and I no time to debug it, so I aspired want to make a single uli526x.c file just for our controllers. Could you help us remove the ULi m5261/m5263 lan controller support from tulip core driver and add the new single uli526x.c file for us? Signed-off-by: Peer Chen Signed-off-by: Jeff Garzik --- drivers/net/tulip/Kconfig | 12 + drivers/net/tulip/Makefile | 1 + drivers/net/tulip/uli526x.c | 1770 +++++++++++++++++++++++++++++++++++ 3 files changed, 1783 insertions(+) create mode 100644 drivers/net/tulip/uli526x.c diff --git a/drivers/net/tulip/Kconfig b/drivers/net/tulip/Kconfig index e2cdaf876201..8c9634a98c11 100644 --- a/drivers/net/tulip/Kconfig +++ b/drivers/net/tulip/Kconfig @@ -135,6 +135,18 @@ config DM9102 . The module will be called dmfe. +config ULI526X + tristate "ULi M526x controller support" + depends on NET_TULIP && PCI + select CRC32 + ---help--- + This driver is for ULi M5261/M5263 10/100M Ethernet Controller + (). + + To compile this driver as a module, choose M here and read + . The module will + be called uli526x. + config PCMCIA_XIRCOM tristate "Xircom CardBus support (new driver)" depends on NET_TULIP && CARDBUS diff --git a/drivers/net/tulip/Makefile b/drivers/net/tulip/Makefile index 8bb9b4683979..451090d6fcca 100644 --- a/drivers/net/tulip/Makefile +++ b/drivers/net/tulip/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_WINBOND_840) += winbond-840.o obj-$(CONFIG_DE2104X) += de2104x.o obj-$(CONFIG_TULIP) += tulip.o obj-$(CONFIG_DE4X5) += de4x5.o +obj-$(CONFIG_ULI526X) += uli526x.o # Declare multi-part drivers. diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c new file mode 100644 index 000000000000..27f99e087f79 --- /dev/null +++ b/drivers/net/tulip/uli526x.c @@ -0,0 +1,1770 @@ +/* + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + +*/ + +#define DRV_NAME "uli526x" +#define DRV_VERSION "0.9.3" +#define DRV_RELDATE "2005-7-29" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/* Board/System/Debug information/definition ---------------- */ +#define PCI_ULI5261_ID 0x526110B9 /* ULi M5261 ID*/ +#define PCI_ULI5263_ID 0x526310B9 /* ULi M5263 ID*/ + +#define ULI526X_IO_SIZE 0x100 +#define TX_DESC_CNT 0x20 /* Allocated Tx descriptors */ +#define RX_DESC_CNT 0x30 /* Allocated Rx descriptors */ +#define TX_FREE_DESC_CNT (TX_DESC_CNT - 2) /* Max TX packet count */ +#define TX_WAKE_DESC_CNT (TX_DESC_CNT - 3) /* TX wakeup count */ +#define DESC_ALL_CNT (TX_DESC_CNT + RX_DESC_CNT) +#define TX_BUF_ALLOC 0x600 +#define RX_ALLOC_SIZE 0x620 +#define ULI526X_RESET 1 +#define CR0_DEFAULT 0 +#define CR6_DEFAULT 0x00080000 /* HD */ +#define CR6_DEFAULT_A 0x22240000 +#define CR7_DEFAULT 0x180c1 +#define CR15_DEFAULT 0x06 /* TxJabber RxWatchdog */ +#define TDES0_ERR_MASK 0x4302 /* TXJT, LC, EC, FUE */ +#define MAX_PACKET_SIZE 1514 +#define ULI5261_MAX_MULTICAST 14 +#define RX_COPY_SIZE 100 +#define MAX_CHECK_PACKET 0x8000 + +#define ULI526X_10MHF 0 +#define ULI526X_100MHF 1 +#define ULI526X_10MFD 4 +#define ULI526X_100MFD 5 +#define ULI526X_AUTO 8 + +#define ULI526X_TXTH_72 0x400000 /* TX TH 72 byte */ +#define ULI526X_TXTH_96 0x404000 /* TX TH 96 byte */ +#define ULI526X_TXTH_128 0x0000 /* TX TH 128 byte */ +#define ULI526X_TXTH_256 0x4000 /* TX TH 256 byte */ +#define ULI526X_TXTH_512 0x8000 /* TX TH 512 byte */ +#define ULI526X_TXTH_1K 0xC000 /* TX TH 1K byte */ + +#define ULI526X_TIMER_WUT (jiffies + HZ * 1)/* timer wakeup time : 1 second */ +#define ULI526X_TX_TIMEOUT ((16*HZ)/2) /* tx packet time-out time 8 s" */ +#define ULI526X_TX_KICK (4*HZ/2) /* tx packet Kick-out time 2 s" */ + +#define ULI526X_DBUG(dbug_now, msg, value) if (uli526x_debug || (dbug_now)) printk(KERN_ERR DRV_NAME ": %s %lx\n", (msg), (long) (value)) + +#define SHOW_MEDIA_TYPE(mode) printk(KERN_ERR DRV_NAME ": Change Speed to %sMhz %s duplex\n",mode & 1 ?"100":"10", mode & 4 ? "full":"half"); + + +/* CR9 definition: SROM/MII */ +#define CR9_SROM_READ 0x4800 +#define CR9_SRCS 0x1 +#define CR9_SRCLK 0x2 +#define CR9_CRDOUT 0x8 +#define SROM_DATA_0 0x0 +#define SROM_DATA_1 0x4 +#define PHY_DATA_1 0x20000 +#define PHY_DATA_0 0x00000 +#define MDCLKH 0x10000 + +#define PHY_POWER_DOWN 0x800 + +#define SROM_V41_CODE 0x14 + +#define SROM_CLK_WRITE(data, ioaddr) outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr);udelay(5);outl(data|CR9_SROM_READ|CR9_SRCS|CR9_SRCLK,ioaddr);udelay(5);outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr);udelay(5); + +/* Sten Check */ +#define DEVICE net_device + +/* Structure/enum declaration ------------------------------- */ +struct tx_desc { + u32 tdes0, tdes1, tdes2, tdes3; /* Data for the card */ + char *tx_buf_ptr; /* Data for us */ + struct tx_desc *next_tx_desc; +} __attribute__(( aligned(32) )); + +struct rx_desc { + u32 rdes0, rdes1, rdes2, rdes3; /* Data for the card */ + struct sk_buff *rx_skb_ptr; /* Data for us */ + struct rx_desc *next_rx_desc; +} __attribute__(( aligned(32) )); + +struct uli526x_board_info { + u32 chip_id; /* Chip vendor/Device ID */ + struct DEVICE *next_dev; /* next device */ + struct pci_dev *pdev; /* PCI device */ + spinlock_t lock; + + long ioaddr; /* I/O base address */ + u32 cr0_data; + u32 cr5_data; + u32 cr6_data; + u32 cr7_data; + u32 cr15_data; + + /* pointer for memory physical address */ + dma_addr_t buf_pool_dma_ptr; /* Tx buffer pool memory */ + dma_addr_t buf_pool_dma_start; /* Tx buffer pool align dword */ + dma_addr_t desc_pool_dma_ptr; /* descriptor pool memory */ + dma_addr_t first_tx_desc_dma; + dma_addr_t first_rx_desc_dma; + + /* descriptor pointer */ + unsigned char *buf_pool_ptr; /* Tx buffer pool memory */ + unsigned char *buf_pool_start; /* Tx buffer pool align dword */ + unsigned char *desc_pool_ptr; /* descriptor pool memory */ + struct tx_desc *first_tx_desc; + struct tx_desc *tx_insert_ptr; + struct tx_desc *tx_remove_ptr; + struct rx_desc *first_rx_desc; + struct rx_desc *rx_insert_ptr; + struct rx_desc *rx_ready_ptr; /* packet come pointer */ + unsigned long tx_packet_cnt; /* transmitted packet count */ + unsigned long rx_avail_cnt; /* available rx descriptor count */ + unsigned long interval_rx_cnt; /* rx packet count a callback time */ + + u16 dbug_cnt; + u16 NIC_capability; /* NIC media capability */ + u16 PHY_reg4; /* Saved Phyxcer register 4 value */ + + u8 media_mode; /* user specify media mode */ + u8 op_mode; /* real work media mode */ + u8 phy_addr; + u8 link_failed; /* Ever link failed */ + u8 wait_reset; /* Hardware failed, need to reset */ + struct timer_list timer; + + /* System defined statistic counter */ + struct net_device_stats stats; + + /* Driver defined statistic counter */ + unsigned long tx_fifo_underrun; + unsigned long tx_loss_carrier; + unsigned long tx_no_carrier; + unsigned long tx_late_collision; + unsigned long tx_excessive_collision; + unsigned long tx_jabber_timeout; + unsigned long reset_count; + unsigned long reset_cr8; + unsigned long reset_fatal; + unsigned long reset_TXtimeout; + + /* NIC SROM data */ + unsigned char srom[128]; + u8 init; +}; + +enum uli526x_offsets { + DCR0 = 0x00, DCR1 = 0x08, DCR2 = 0x10, DCR3 = 0x18, DCR4 = 0x20, + DCR5 = 0x28, DCR6 = 0x30, DCR7 = 0x38, DCR8 = 0x40, DCR9 = 0x48, + DCR10 = 0x50, DCR11 = 0x58, DCR12 = 0x60, DCR13 = 0x68, DCR14 = 0x70, + DCR15 = 0x78 +}; + +enum uli526x_CR6_bits { + CR6_RXSC = 0x2, CR6_PBF = 0x8, CR6_PM = 0x40, CR6_PAM = 0x80, + CR6_FDM = 0x200, CR6_TXSC = 0x2000, CR6_STI = 0x100000, + CR6_SFT = 0x200000, CR6_RXA = 0x40000000, CR6_NO_PURGE = 0x20000000 +}; + +/* Global variable declaration ----------------------------- */ +static int __devinitdata printed_version; +static char version[] __devinitdata = + KERN_INFO DRV_NAME ": ULi M5261/M5263 net driver, version " + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static int uli526x_debug; +static unsigned char uli526x_media_mode = ULI526X_AUTO; +static u32 uli526x_cr6_user_set; + +/* For module input parameter */ +static int debug; +static u32 cr6set; +static u32 m526x_id; +static unsigned char mode = 8; + +/* function declaration ------------------------------------- */ +static int uli526x_open(struct DEVICE *); +static int uli526x_start_xmit(struct sk_buff *, struct DEVICE *); +static int uli526x_stop(struct DEVICE *); +static struct net_device_stats * uli526x_get_stats(struct DEVICE *); +static void uli526x_set_filter_mode(struct DEVICE *); +static struct ethtool_ops netdev_ethtool_ops; +static u16 read_srom_word(long ,int); +static irqreturn_t uli526x_interrupt(int , void *, struct pt_regs *); +static void uli526x_descriptor_init(struct uli526x_board_info *, unsigned long); +static void allocate_rx_buffer(struct uli526x_board_info *); +static void update_cr6(u32, unsigned long); +static void send_filter_frame(struct DEVICE * ,int); +static u16 phy_read(unsigned long, u8, u8, u32); +static u16 phy_readby_cr10(unsigned long, u8, u8); +static void phy_write(unsigned long, u8, u8, u16, u32); +static void phy_writeby_cr10(unsigned long, u8, u8, u16); +static void phy_write_1bit(unsigned long, u32, u32); +static u16 phy_read_1bit(unsigned long, u32); +static u8 uli526x_sense_speed(struct uli526x_board_info *); +static void uli526x_process_mode(struct uli526x_board_info *); +static void uli526x_timer(unsigned long); +static void uli526x_rx_packet(struct DEVICE *, struct uli526x_board_info *); +static void uli526x_free_tx_pkt(struct DEVICE *, struct uli526x_board_info *); +static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *); +static void uli526x_dynamic_reset(struct DEVICE *); +static void uli526x_free_rxbuffer(struct uli526x_board_info *); +static void uli526x_init(struct DEVICE *); +static void uli526x_set_phyxcer(struct uli526x_board_info *); + +/* ULI526X network baord routine ---------------------------- */ + +/* + * Search ULI526X board ,allocate space and register it + */ + +static int __devinit uli526x_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct uli526x_board_info *db; /* board information structure */ + struct net_device *dev; + int i, err; + u32 configval; + + ULI526X_DBUG(0, "uli526x_init_one()", 0); + + if (!printed_version++) + printk(version); + + /* Init network device */ + dev = alloc_etherdev(sizeof(*db)); + if (dev == NULL) + return -ENOMEM; + SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + if (pci_set_dma_mask(pdev, 0xffffffff)) { + printk(KERN_WARNING DRV_NAME ": 32-bit PCI DMA not available.\n"); + err = -ENODEV; + goto err_out_free; + } + + /* Enable Master/IO access, Disable memory access */ + err = pci_enable_device(pdev); + if (err) + goto err_out_free; + + if (!pci_resource_start(pdev, 0)) { + printk(KERN_ERR DRV_NAME ": I/O base is zero\n"); + err = -ENODEV; + goto err_out_disable; + } + + if (pci_resource_len(pdev, 0) < (ULI526X_IO_SIZE) ) { + printk(KERN_ERR DRV_NAME ": Allocated I/O size too small\n"); + err = -ENODEV; + goto err_out_disable; + } + + if (pci_request_regions(pdev, DRV_NAME)) { + printk(KERN_ERR DRV_NAME ": Failed to request PCI regions\n"); + err = -ENODEV; + goto err_out_disable; + } + + //add by clearzhang 2004/7/8 + pci_read_config_dword(pdev,0x0,&configval); + m526x_id = configval; + if(configval == 0x526310b9) + { + //printk("is m5263\n"); + pci_read_config_dword(pdev,0x0c,&configval); + configval = ((configval & 0xffff00ff) | 0x8000); + pci_write_config_dword(pdev,0x0c,configval); + } + /* Init system & device */ + db = netdev_priv(dev); + + /* Allocate Tx/Rx descriptor memory */ + db->desc_pool_ptr = pci_alloc_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20, &db->desc_pool_dma_ptr); + db->buf_pool_ptr = pci_alloc_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, &db->buf_pool_dma_ptr); + + db->first_tx_desc = (struct tx_desc *) db->desc_pool_ptr; + db->first_tx_desc_dma = db->desc_pool_dma_ptr; + db->buf_pool_start = db->buf_pool_ptr; + db->buf_pool_dma_start = db->buf_pool_dma_ptr; + + db->chip_id = ent->driver_data; + db->ioaddr = pci_resource_start(pdev, 0); + + db->pdev = pdev; + db->init = 1; + + dev->base_addr = db->ioaddr; + dev->irq = pdev->irq; + pci_set_drvdata(pdev, dev); + + /* Register some necessary functions */ + dev->open = &uli526x_open; + dev->hard_start_xmit = &uli526x_start_xmit; + dev->stop = &uli526x_stop; + dev->get_stats = &uli526x_get_stats; + dev->set_multicast_list = &uli526x_set_filter_mode; + dev->ethtool_ops = &netdev_ethtool_ops; + spin_lock_init(&db->lock); + + + /* read 64 word srom data */ + for (i = 0; i < 64; i++) + ((u16 *) db->srom)[i] = cpu_to_le16(read_srom_word(db->ioaddr, i)); + + /* Set Node address */ + if(((u16 *) db->srom)[0] == 0xffff) /* SROM absent, so read MAC address from ID Table */ + { + outl(0x10000, db->ioaddr + DCR0); //Diagnosis mode + outl(0x1c0, db->ioaddr + DCR13); //Reset dianostic pointer port + outl(0, db->ioaddr + DCR14); //Clear reset port + outl(0x10, db->ioaddr + DCR14); //Reset ID Table pointer + outl(0, db->ioaddr + DCR14); //Clear reset port + outl(0, db->ioaddr + DCR13); //Clear CR13 + outl(0x1b0, db->ioaddr + DCR13); //Select ID Table access port + //Read MAC address from CR14 + for (i = 0; i < 6; i++) + dev->dev_addr[i] = inl(db->ioaddr + DCR14); + //Read end + outl(0, db->ioaddr + DCR13); //Clear CR13 + outl(0, db->ioaddr + DCR0); //Clear CR0 + udelay(10); + } + else /*Exist SROM*/ + { + for (i = 0; i < 6; i++) + dev->dev_addr[i] = db->srom[20 + i]; + } + err = register_netdev (dev); + if (err) + goto err_out_res; + + printk(KERN_INFO "%s: ULi M%04lx at pci%s,",dev->name,ent->driver_data >> 16,pci_name(pdev)); + + for (i = 0; i < 6; i++) + printk("%c%02x", i ? ':' : ' ', dev->dev_addr[i]); + printk(", irq %d.\n", dev->irq); + + pci_set_master(pdev); + + return 0; + +err_out_res: + pci_release_regions(pdev); +err_out_disable: + pci_disable_device(pdev); +err_out_free: + pci_set_drvdata(pdev, NULL); + free_netdev(dev); + + return err; +} + + +static void __devexit uli526x_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct uli526x_board_info *db = netdev_priv(dev); + + ULI526X_DBUG(0, "uli526x_remove_one()", 0); + + if (dev) { + pci_free_consistent(db->pdev, sizeof(struct tx_desc) * + DESC_ALL_CNT + 0x20, db->desc_pool_ptr, + db->desc_pool_dma_ptr); + pci_free_consistent(db->pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, + db->buf_pool_ptr, db->buf_pool_dma_ptr); + unregister_netdev(dev); + pci_release_regions(pdev); + free_netdev(dev); /* free board information */ + pci_set_drvdata(pdev, NULL); + } + + ULI526X_DBUG(0, "uli526x_remove_one() exit", 0); +} + + +/* + * Open the interface. + * The interface is opened whenever "ifconfig" actives it. + */ + +static int uli526x_open(struct DEVICE *dev) +{ + int ret; + struct uli526x_board_info *db = netdev_priv(dev); + + ULI526X_DBUG(0, "uli526x_open", 0); + + ret = request_irq(dev->irq, &uli526x_interrupt, SA_SHIRQ, dev->name, dev); + if (ret) + return ret; + + /* system variable init */ + db->cr6_data = CR6_DEFAULT | uli526x_cr6_user_set; + if(m526x_id == 0x526310b9) + { + //printk("is 5263\n"); + db->cr6_data = CR6_DEFAULT_A | uli526x_cr6_user_set; + } + db->tx_packet_cnt = 0; + db->rx_avail_cnt = 0; + db->link_failed = 1; + netif_carrier_off(dev); + db->wait_reset = 0; + + db->NIC_capability = 0xf; /* All capability*/ + db->PHY_reg4 = 0x1e0; + + /* CR6 operation mode decision */ + db->cr6_data |= ULI526X_TXTH_256; + db->cr0_data = CR0_DEFAULT; + + /* Initilize ULI526X board */ + uli526x_init(dev); + + /* Active System Interface */ + netif_wake_queue(dev); + + /* set and active a timer process */ + init_timer(&db->timer); + db->timer.expires = ULI526X_TIMER_WUT + HZ * 2; + db->timer.data = (unsigned long)dev; + db->timer.function = &uli526x_timer; + add_timer(&db->timer); + + return 0; +} + + +/* Initilize ULI526X board + * Reset ULI526X board + * Initilize TX/Rx descriptor chain structure + * Send the set-up frame + * Enable Tx/Rx machine + */ + +static void uli526x_init(struct DEVICE *dev) +{ + struct uli526x_board_info *db = netdev_priv(dev); + unsigned long ioaddr = db->ioaddr; + u8 phy_tmp; + u16 phy_value; + u16 phy_reg_reset; + + ULI526X_DBUG(0, "uli526x_init()", 0); + + /* Reset M526x MAC controller */ + outl(ULI526X_RESET, ioaddr + DCR0); /* RESET MAC */ + udelay(100); + outl(db->cr0_data, ioaddr + DCR0); + udelay(5); + + /* Phy addr : In some boards,M5261/M5263 phy address != 1 */ + db->phy_addr = 1; + for(phy_tmp=0;phy_tmp<32;phy_tmp++) + { + phy_value=phy_read(db->ioaddr,phy_tmp,3,db->chip_id);//peer add + if(phy_value != 0xffff&&phy_value!=0) + { + db->phy_addr = phy_tmp; + break; + } + } + if(phy_tmp == 32) + printk(KERN_WARNING "Can not find the phy address!!!"); + /* Parser SROM and media mode */ + db->media_mode = uli526x_media_mode; + + //add by clearzhang 2004/7/8 + /* RESET Phyxcer Chip by GPR port bit 7 */ + //outl(0x180, ioaddr + DCR12); /* Let bit 7 output port */ + //outl(0x0, ioaddr + DCR12); /* Clear RESET signal */ + + /* Phyxcer capability setting */ + phy_reg_reset = phy_read(db->ioaddr, db->phy_addr, 0, db->chip_id); + phy_reg_reset = (phy_reg_reset | 0x8000); + phy_write(db->ioaddr, db->phy_addr, 0, phy_reg_reset, db->chip_id); + udelay(500); + + /* Process Phyxcer Media Mode */ + uli526x_set_phyxcer(db); + + /* Media Mode Process */ + if ( !(db->media_mode & ULI526X_AUTO) ) + db->op_mode = db->media_mode; /* Force Mode */ + + /* Initiliaze Transmit/Receive decriptor and CR3/4 */ + uli526x_descriptor_init(db, ioaddr); + + /* Init CR6 to program M526X operation */ + update_cr6(db->cr6_data, ioaddr); + + /* Send setup frame */ + send_filter_frame(dev, dev->mc_count); /* M5261/M5263 */ + + /* Init CR7, interrupt active bit */ + db->cr7_data = CR7_DEFAULT; + outl(db->cr7_data, ioaddr + DCR7); + + /* Init CR15, Tx jabber and Rx watchdog timer */ + outl(db->cr15_data, ioaddr + DCR15); + + /* Enable ULI526X Tx/Rx function */ + db->cr6_data |= CR6_RXSC | CR6_TXSC; + update_cr6(db->cr6_data, ioaddr); +} + + +/* + * Hardware start transmission. + * Send a packet to media from the upper layer. + */ + +static int uli526x_start_xmit(struct sk_buff *skb, struct DEVICE *dev) +{ + struct uli526x_board_info *db = netdev_priv(dev); + struct tx_desc *txptr; + unsigned long flags; + + ULI526X_DBUG(0, "uli526x_start_xmit", 0); + + /* Resource flag check */ + netif_stop_queue(dev); + + /* Too large packet check */ + if (skb->len > MAX_PACKET_SIZE) { + printk(KERN_ERR DRV_NAME ": big packet = %d\n", (u16)skb->len); + dev_kfree_skb(skb); + return 0; + } + + spin_lock_irqsave(&db->lock, flags); + + /* No Tx resource check, it never happen nromally */ + if (db->tx_packet_cnt >= TX_FREE_DESC_CNT) { + spin_unlock_irqrestore(&db->lock, flags); + printk(KERN_ERR DRV_NAME ": No Tx resource %ld\n", db->tx_packet_cnt); + return 1; + } + + /* Disable NIC interrupt */ + outl(0, dev->base_addr + DCR7); + + /* transmit this packet */ + txptr = db->tx_insert_ptr; + memcpy(txptr->tx_buf_ptr, skb->data, skb->len); + txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len); + + /* Point to next transmit free descriptor */ + db->tx_insert_ptr = txptr->next_tx_desc; + + /* Transmit Packet Process */ + if ( (db->tx_packet_cnt < TX_DESC_CNT) ) { + txptr->tdes0 = cpu_to_le32(0x80000000); /* Set owner bit */ + db->tx_packet_cnt++; /* Ready to send */ + outl(0x1, dev->base_addr + DCR1); /* Issue Tx polling */ + dev->trans_start = jiffies; /* saved time stamp */ + } + + /* Tx resource check */ + if ( db->tx_packet_cnt < TX_FREE_DESC_CNT ) + netif_wake_queue(dev); + + /* Restore CR7 to enable interrupt */ + spin_unlock_irqrestore(&db->lock, flags); + outl(db->cr7_data, dev->base_addr + DCR7); + + /* free this SKB */ + dev_kfree_skb(skb); + + return 0; +} + + +/* + * Stop the interface. + * The interface is stopped when it is brought. + */ + +static int uli526x_stop(struct DEVICE *dev) +{ + struct uli526x_board_info *db = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + + ULI526X_DBUG(0, "uli526x_stop", 0); + + /* disable system */ + netif_stop_queue(dev); + + /* deleted timer */ + del_timer_sync(&db->timer); + + /* Reset & stop ULI526X board */ + outl(ULI526X_RESET, ioaddr + DCR0); + udelay(5); + phy_write(db->ioaddr, db->phy_addr, 0, 0x8000, db->chip_id); + + /* free interrupt */ + free_irq(dev->irq, dev); + + /* free allocated rx buffer */ + uli526x_free_rxbuffer(db); + +#if 0 + /* show statistic counter */ + printk(DRV_NAME ": FU:%lx EC:%lx LC:%lx NC:%lx LOC:%lx TXJT:%lx RESET:%lx RCR8:%lx FAL:%lx TT:%lx\n", + db->tx_fifo_underrun, db->tx_excessive_collision, + db->tx_late_collision, db->tx_no_carrier, db->tx_loss_carrier, + db->tx_jabber_timeout, db->reset_count, db->reset_cr8, + db->reset_fatal, db->reset_TXtimeout); +#endif + + return 0; +} + + +/* + * M5261/M5263 insterrupt handler + * receive the packet to upper layer, free the transmitted packet + */ + +static irqreturn_t uli526x_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct DEVICE *dev = dev_id; + struct uli526x_board_info *db = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + unsigned long flags; + + //ULI526X_DBUG(0, "uli526x_interrupt()", 0); + + if (!dev) { + ULI526X_DBUG(1, "uli526x_interrupt() without DEVICE arg", 0); + return IRQ_NONE; + } + + //outl(0, ioaddr + DCR7); + spin_lock_irqsave(&db->lock, flags); + outl(0, ioaddr + DCR7); + + /* Got ULI526X status */ + db->cr5_data = inl(ioaddr + DCR5); + outl(db->cr5_data, ioaddr + DCR5); + if ( !(db->cr5_data & 0x180c1) ) { + spin_unlock_irqrestore(&db->lock, flags); + outl(db->cr7_data, ioaddr + DCR7); + return IRQ_HANDLED; + } + + /* Disable all interrupt in CR7 to solve the interrupt edge problem */ + //outl(0, ioaddr + DCR7); + + /* Check system status */ + if (db->cr5_data & 0x2000) { + /* system bus error happen */ + ULI526X_DBUG(1, "System bus error happen. CR5=", db->cr5_data); + db->reset_fatal++; + db->wait_reset = 1; /* Need to RESET */ + spin_unlock_irqrestore(&db->lock, flags); + return IRQ_HANDLED; + } + + /* Received the coming packet */ + if ( (db->cr5_data & 0x40) && db->rx_avail_cnt ) + uli526x_rx_packet(dev, db); + + /* reallocate rx descriptor buffer */ + if (db->rx_avail_cntcr5_data & 0x01) + uli526x_free_tx_pkt(dev, db); + + /* Restore CR7 to enable interrupt mask */ + outl(db->cr7_data, ioaddr + DCR7); + + spin_unlock_irqrestore(&db->lock, flags); + return IRQ_HANDLED; +} + + +/* + * Free TX resource after TX complete + */ + +static void uli526x_free_tx_pkt(struct DEVICE *dev, struct uli526x_board_info * db) +{ + struct tx_desc *txptr; +// unsigned long ioaddr = dev->base_addr; + u32 tdes0; + + txptr = db->tx_remove_ptr; + while(db->tx_packet_cnt) { + tdes0 = le32_to_cpu(txptr->tdes0); + /* printk(DRV_NAME ": tdes0=%x\n", tdes0); */ + if (tdes0 & 0x80000000) + break; + + /* A packet sent completed */ + db->tx_packet_cnt--; + db->stats.tx_packets++; + + /* Transmit statistic counter */ + if ( tdes0 != 0x7fffffff ) { + /* printk(DRV_NAME ": tdes0=%x\n", tdes0); */ + db->stats.collisions += (tdes0 >> 3) & 0xf; + db->stats.tx_bytes += le32_to_cpu(txptr->tdes1) & 0x7ff; + if (tdes0 & TDES0_ERR_MASK) { + db->stats.tx_errors++; + if (tdes0 & 0x0002) { /* UnderRun */ + db->tx_fifo_underrun++; + if ( !(db->cr6_data & CR6_SFT) ) { + db->cr6_data = db->cr6_data | CR6_SFT; + update_cr6(db->cr6_data, db->ioaddr); + } + } + if (tdes0 & 0x0100) + db->tx_excessive_collision++; + if (tdes0 & 0x0200) + db->tx_late_collision++; + if (tdes0 & 0x0400) + db->tx_no_carrier++; + if (tdes0 & 0x0800) + db->tx_loss_carrier++; + if (tdes0 & 0x4000) + db->tx_jabber_timeout++; + } + } + + txptr = txptr->next_tx_desc; + }/* End of while */ + + /* Update TX remove pointer to next */ + db->tx_remove_ptr = txptr; + + /* Resource available check */ + if ( db->tx_packet_cnt < TX_WAKE_DESC_CNT ) + netif_wake_queue(dev); /* Active upper layer, send again */ +} + + +/* + * Receive the come packet and pass to upper layer + */ + +static void uli526x_rx_packet(struct DEVICE *dev, struct uli526x_board_info * db) +{ + struct rx_desc *rxptr; + struct sk_buff *skb; + int rxlen; + u32 rdes0; + + rxptr = db->rx_ready_ptr; + + while(db->rx_avail_cnt) { + rdes0 = le32_to_cpu(rxptr->rdes0); + if (rdes0 & 0x80000000) /* packet owner check */ + { + break; + } + + db->rx_avail_cnt--; + db->interval_rx_cnt++; + + pci_unmap_single(db->pdev, le32_to_cpu(rxptr->rdes2), RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE); + if ( (rdes0 & 0x300) != 0x300) { + /* A packet without First/Last flag */ + /* reuse this SKB */ + ULI526X_DBUG(0, "Reuse SK buffer, rdes0", rdes0); + uli526x_reuse_skb(db, rxptr->rx_skb_ptr); + } else { + /* A packet with First/Last flag */ + rxlen = ( (rdes0 >> 16) & 0x3fff) - 4; + + /* error summary bit check */ + if (rdes0 & 0x8000) { + /* This is a error packet */ + //printk(DRV_NAME ": rdes0: %lx\n", rdes0); + db->stats.rx_errors++; + if (rdes0 & 1) + db->stats.rx_fifo_errors++; + if (rdes0 & 2) + db->stats.rx_crc_errors++; + if (rdes0 & 0x80) + db->stats.rx_length_errors++; + } + + if ( !(rdes0 & 0x8000) || + ((db->cr6_data & CR6_PM) && (rxlen>6)) ) { + skb = rxptr->rx_skb_ptr; + + /* Good packet, send to upper layer */ + /* Shorst packet used new SKB */ + if ( (rxlen < RX_COPY_SIZE) && + ( (skb = dev_alloc_skb(rxlen + 2) ) + != NULL) ) { + /* size less than COPY_SIZE, allocate a rxlen SKB */ + skb->dev = dev; + skb_reserve(skb, 2); /* 16byte align */ + memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen); + uli526x_reuse_skb(db, rxptr->rx_skb_ptr); + } else { + skb->dev = dev; + skb_put(skb, rxlen); + } + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->last_rx = jiffies; + db->stats.rx_packets++; + db->stats.rx_bytes += rxlen; + + } else { + /* Reuse SKB buffer when the packet is error */ + ULI526X_DBUG(0, "Reuse SK buffer, rdes0", rdes0); + uli526x_reuse_skb(db, rxptr->rx_skb_ptr); + } + } + + rxptr = rxptr->next_rx_desc; + } + + db->rx_ready_ptr = rxptr; +} + + +/* + * Get statistics from driver. + */ + +static struct net_device_stats * uli526x_get_stats(struct DEVICE *dev) +{ + struct uli526x_board_info *db = netdev_priv(dev); + + ULI526X_DBUG(0, "uli526x_get_stats", 0); + return &db->stats; +} + + +/* + * Set ULI526X multicast address + */ + +static void uli526x_set_filter_mode(struct DEVICE * dev) +{ + struct uli526x_board_info *db = dev->priv; + unsigned long flags; + + ULI526X_DBUG(0, "uli526x_set_filter_mode()", 0); + spin_lock_irqsave(&db->lock, flags); + + if (dev->flags & IFF_PROMISC) { + ULI526X_DBUG(0, "Enable PROM Mode", 0); + db->cr6_data |= CR6_PM | CR6_PBF; + update_cr6(db->cr6_data, db->ioaddr); + spin_unlock_irqrestore(&db->lock, flags); + return; + } + + if (dev->flags & IFF_ALLMULTI || dev->mc_count > ULI5261_MAX_MULTICAST) { + ULI526X_DBUG(0, "Pass all multicast address", dev->mc_count); + db->cr6_data &= ~(CR6_PM | CR6_PBF); + db->cr6_data |= CR6_PAM; + spin_unlock_irqrestore(&db->lock, flags); + return; + } + + ULI526X_DBUG(0, "Set multicast address", dev->mc_count); + send_filter_frame(dev, dev->mc_count); /* M5261/M5263 */ + spin_unlock_irqrestore(&db->lock, flags); +} + +static void +ULi_ethtool_gset(struct uli526x_board_info *db, struct ethtool_cmd *ecmd) +{ + //struct e1000_hw *hw = &adapter->hw; + + { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | + SUPPORTED_MII); + + ecmd->advertising = (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_Autoneg | + ADVERTISED_MII); + + + ecmd->port = PORT_MII; + ecmd->phy_address = db->phy_addr; + + ecmd->transceiver = XCVR_EXTERNAL; + + + } + + + ecmd->speed = 10; + ecmd->duplex = DUPLEX_HALF; + + if(db->op_mode==ULI526X_100MHF || db->op_mode==ULI526X_100MFD) + { + ecmd->speed = 100; + } + if(db->op_mode==ULI526X_10MFD || db->op_mode==ULI526X_100MFD) + { + ecmd->duplex = DUPLEX_FULL; + } + if(db->link_failed) + { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + if (db->media_mode & ULI526X_AUTO) + { + ecmd->autoneg = AUTONEG_ENABLE; + } + + +} + +static void netdev_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct uli526x_board_info *np = netdev_priv(dev); + + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + if (np->pdev) + strcpy(info->bus_info, pci_name(np->pdev)); + else + sprintf(info->bus_info, "EISA 0x%lx %d", + dev->base_addr, dev->irq); +} + +static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct uli526x_board_info *np = netdev_priv(dev); + + ULi_ethtool_gset(np, cmd); + + return 0; +} + +static u32 netdev_get_link(struct net_device *dev) { + struct uli526x_board_info *np = netdev_priv(dev); + + if(np->link_failed) + return 0; + else + return 1; +} + +static void uli526x_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + wol->supported = WAKE_PHY | WAKE_MAGIC; + wol->wolopts = 0; +} + +static struct ethtool_ops netdev_ethtool_ops = { + .get_drvinfo = netdev_get_drvinfo, + .get_settings = netdev_get_settings, + .get_link = netdev_get_link, + .get_wol = uli526x_get_wol, +}; + +/* + * A periodic timer routine + * Dynamic media sense, allocate Rx buffer... + */ + +static void uli526x_timer(unsigned long data) +{ + u32 tmp_cr8; + unsigned char tmp_cr12=0; + struct DEVICE *dev = (struct DEVICE *) data; + struct uli526x_board_info *db = netdev_priv(dev); + unsigned long flags; + u8 TmpSpeed=10; + + //ULI526X_DBUG(0, "uli526x_timer()", 0); + spin_lock_irqsave(&db->lock, flags); + + + /* Dynamic reset ULI526X : system error or transmit time-out */ + tmp_cr8 = inl(db->ioaddr + DCR8); + if ( (db->interval_rx_cnt==0) && (tmp_cr8) ) { + db->reset_cr8++; + db->wait_reset = 1; + } + db->interval_rx_cnt = 0; + + /* TX polling kick monitor */ + if ( db->tx_packet_cnt && + time_after(jiffies, dev->trans_start + ULI526X_TX_KICK) ) { + outl(0x1, dev->base_addr + DCR1); // Tx polling again + + // TX Timeout + if ( time_after(jiffies, dev->trans_start + ULI526X_TX_TIMEOUT) ) { + db->reset_TXtimeout++; + db->wait_reset = 1; + printk( "%s: Tx timeout - resetting\n", + dev->name); + } + } + + if (db->wait_reset) { + ULI526X_DBUG(0, "Dynamic Reset device", db->tx_packet_cnt); + db->reset_count++; + uli526x_dynamic_reset(dev); + db->timer.expires = ULI526X_TIMER_WUT; + add_timer(&db->timer); + spin_unlock_irqrestore(&db->lock, flags); + return; + } + + /* Link status check, Dynamic media type change */ + if((phy_read(db->ioaddr, db->phy_addr, 5, db->chip_id) & 0x01e0)!=0) + tmp_cr12 = 3; + + if ( !(tmp_cr12 & 0x3) && !db->link_failed ) { + /* Link Failed */ + ULI526X_DBUG(0, "Link Failed", tmp_cr12); + netif_carrier_off(dev); + printk(KERN_INFO "uli526x: %s NIC Link is Down\n",dev->name); + db->link_failed = 1; + + /* For Force 10/100M Half/Full mode: Enable Auto-Nego mode */ + /* AUTO don't need */ + if ( !(db->media_mode & 0x8) ) + phy_write(db->ioaddr, db->phy_addr, 0, 0x1000, db->chip_id); + + /* AUTO mode, if INT phyxcer link failed, select EXT device */ + if (db->media_mode & ULI526X_AUTO) { + db->cr6_data&=~0x00000200; /* bit9=0, HD mode */ + update_cr6(db->cr6_data, db->ioaddr); + } + } else + if ((tmp_cr12 & 0x3) && db->link_failed) { + ULI526X_DBUG(0, "Link link OK", tmp_cr12); + db->link_failed = 0; + + /* Auto Sense Speed */ + if ( (db->media_mode & ULI526X_AUTO) && + uli526x_sense_speed(db) ) + db->link_failed = 1; + uli526x_process_mode(db); + + if(db->link_failed==0) + { + if(db->op_mode==ULI526X_100MHF || db->op_mode==ULI526X_100MFD) + { + TmpSpeed = 100; + } + if(db->op_mode==ULI526X_10MFD || db->op_mode==ULI526X_100MFD) + { + printk(KERN_INFO "uli526x: %s NIC Link is Up %d Mbps Full duplex\n",dev->name,TmpSpeed); + } + else + { + printk(KERN_INFO "uli526x: %s NIC Link is Up %d Mbps Half duplex\n",dev->name,TmpSpeed); + } + netif_carrier_on(dev); + } + /* SHOW_MEDIA_TYPE(db->op_mode); */ + } + else if(!(tmp_cr12 & 0x3) && db->link_failed) + { + if(db->init==1) + { + printk(KERN_INFO "uli526x: %s NIC Link is Down\n",dev->name); + netif_carrier_off(dev); + } + } + db->init=0; + + /* Timer active again */ + db->timer.expires = ULI526X_TIMER_WUT; + add_timer(&db->timer); + spin_unlock_irqrestore(&db->lock, flags); +} + + +/* + * Dynamic reset the ULI526X board + * Stop ULI526X board + * Free Tx/Rx allocated memory + * Reset ULI526X board + * Re-initilize ULI526X board + */ + +static void uli526x_dynamic_reset(struct DEVICE *dev) +{ + struct uli526x_board_info *db = netdev_priv(dev); + + ULI526X_DBUG(0, "uli526x_dynamic_reset()", 0); + + /* Sopt MAC controller */ + db->cr6_data &= ~(CR6_RXSC | CR6_TXSC); /* Disable Tx/Rx */ + update_cr6(db->cr6_data, dev->base_addr); + outl(0, dev->base_addr + DCR7); /* Disable Interrupt */ + outl(inl(dev->base_addr + DCR5), dev->base_addr + DCR5); + + /* Disable upper layer interface */ + netif_stop_queue(dev); + + /* Free Rx Allocate buffer */ + uli526x_free_rxbuffer(db); + + /* system variable init */ + db->tx_packet_cnt = 0; + db->rx_avail_cnt = 0; + db->link_failed = 1; + db->init=1; + db->wait_reset = 0; + + /* Re-initilize ULI526X board */ + uli526x_init(dev); + + /* Restart upper layer interface */ + netif_wake_queue(dev); +} + + +/* + * free all allocated rx buffer + */ + +static void uli526x_free_rxbuffer(struct uli526x_board_info * db) +{ + ULI526X_DBUG(0, "uli526x_free_rxbuffer()", 0); + + /* free allocated rx buffer */ + while (db->rx_avail_cnt) { + dev_kfree_skb(db->rx_ready_ptr->rx_skb_ptr); + db->rx_ready_ptr = db->rx_ready_ptr->next_rx_desc; + db->rx_avail_cnt--; + } +} + + +/* + * Reuse the SK buffer + */ + +static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * skb) +{ + struct rx_desc *rxptr = db->rx_insert_ptr; + + if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) { + rxptr->rx_skb_ptr = skb; + rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); + wmb(); + rxptr->rdes0 = cpu_to_le32(0x80000000); + db->rx_avail_cnt++; + db->rx_insert_ptr = rxptr->next_rx_desc; + } else + ULI526X_DBUG(0, "SK Buffer reuse method error", db->rx_avail_cnt); +} + + +/* + * Initialize transmit/Receive descriptor + * Using Chain structure, and allocate Tx/Rx buffer + */ + +static void uli526x_descriptor_init(struct uli526x_board_info *db, unsigned long ioaddr) +{ + struct tx_desc *tmp_tx; + struct rx_desc *tmp_rx; + unsigned char *tmp_buf; + dma_addr_t tmp_tx_dma, tmp_rx_dma; + dma_addr_t tmp_buf_dma; + int i; + + ULI526X_DBUG(0, "uli526x_descriptor_init()", 0); + + /* tx descriptor start pointer */ + db->tx_insert_ptr = db->first_tx_desc; + db->tx_remove_ptr = db->first_tx_desc; + outl(db->first_tx_desc_dma, ioaddr + DCR4); /* TX DESC address */ + + /* rx descriptor start pointer */ + db->first_rx_desc = (void *)db->first_tx_desc + sizeof(struct tx_desc) * TX_DESC_CNT; + db->first_rx_desc_dma = db->first_tx_desc_dma + sizeof(struct tx_desc) * TX_DESC_CNT; + db->rx_insert_ptr = db->first_rx_desc; + db->rx_ready_ptr = db->first_rx_desc; + outl(db->first_rx_desc_dma, ioaddr + DCR3); /* RX DESC address */ + + /* Init Transmit chain */ + tmp_buf = db->buf_pool_start; + tmp_buf_dma = db->buf_pool_dma_start; + tmp_tx_dma = db->first_tx_desc_dma; + for (tmp_tx = db->first_tx_desc, i = 0; i < TX_DESC_CNT; i++, tmp_tx++) { + tmp_tx->tx_buf_ptr = tmp_buf; + tmp_tx->tdes0 = cpu_to_le32(0); + tmp_tx->tdes1 = cpu_to_le32(0x81000000); /* IC, chain */ + tmp_tx->tdes2 = cpu_to_le32(tmp_buf_dma); + tmp_tx_dma += sizeof(struct tx_desc); + tmp_tx->tdes3 = cpu_to_le32(tmp_tx_dma); + tmp_tx->next_tx_desc = tmp_tx + 1; + tmp_buf = tmp_buf + TX_BUF_ALLOC; + tmp_buf_dma = tmp_buf_dma + TX_BUF_ALLOC; + } + (--tmp_tx)->tdes3 = cpu_to_le32(db->first_tx_desc_dma); + tmp_tx->next_tx_desc = db->first_tx_desc; + + /* Init Receive descriptor chain */ + tmp_rx_dma=db->first_rx_desc_dma; + for (tmp_rx = db->first_rx_desc, i = 0; i < RX_DESC_CNT; i++, tmp_rx++) { + tmp_rx->rdes0 = cpu_to_le32(0); + tmp_rx->rdes1 = cpu_to_le32(0x01000600); + tmp_rx_dma += sizeof(struct rx_desc); + tmp_rx->rdes3 = cpu_to_le32(tmp_rx_dma); + tmp_rx->next_rx_desc = tmp_rx + 1; + } + (--tmp_rx)->rdes3 = cpu_to_le32(db->first_rx_desc_dma); + tmp_rx->next_rx_desc = db->first_rx_desc; + + /* pre-allocate Rx buffer */ + allocate_rx_buffer(db); +} + + +/* + * Update CR6 value + * Firstly stop ULI526X , then written value and start + */ + +static void update_cr6(u32 cr6_data, unsigned long ioaddr) +{ + + outl(cr6_data, ioaddr + DCR6); + udelay(5); +} + + +/* + * Send a setup frame for M5261/M5263 + * This setup frame initilize ULI526X address filter mode + */ + +static void send_filter_frame(struct DEVICE *dev, int mc_cnt) +{ + struct uli526x_board_info *db = netdev_priv(dev); + struct dev_mc_list *mcptr; + struct tx_desc *txptr; + u16 * addrptr; + u32 * suptr; + int i; + + ULI526X_DBUG(0, "send_filter_frame()", 0); + + txptr = db->tx_insert_ptr; + suptr = (u32 *) txptr->tx_buf_ptr; + + /* Node address */ + addrptr = (u16 *) dev->dev_addr; + *suptr++ = addrptr[0]; + *suptr++ = addrptr[1]; + *suptr++ = addrptr[2]; + + /* broadcast address */ + *suptr++ = 0xffff; + *suptr++ = 0xffff; + *suptr++ = 0xffff; + + /* fit the multicast address */ + for (mcptr = dev->mc_list, i = 0; i < mc_cnt; i++, mcptr = mcptr->next) { + addrptr = (u16 *) mcptr->dmi_addr; + *suptr++ = addrptr[0]; + *suptr++ = addrptr[1]; + *suptr++ = addrptr[2]; + } + + for (; i<14; i++) { + *suptr++ = 0xffff; + *suptr++ = 0xffff; + *suptr++ = 0xffff; + } + + /* prepare the setup frame */ + db->tx_insert_ptr = txptr->next_tx_desc; + txptr->tdes1 = cpu_to_le32(0x890000c0); + + /* Resource Check and Send the setup packet */ + if (db->tx_packet_cnt < TX_DESC_CNT) { + /* Resource Empty */ + db->tx_packet_cnt++; + txptr->tdes0 = cpu_to_le32(0x80000000); + update_cr6(db->cr6_data | 0x2000, dev->base_addr); + outl(0x1, dev->base_addr + DCR1); /* Issue Tx polling */ + update_cr6(db->cr6_data, dev->base_addr); + dev->trans_start = jiffies; + } else + printk(KERN_ERR DRV_NAME ": No Tx resource - Send_filter_frame!\n"); +} + + +/* + * Allocate rx buffer, + * As possible as allocate maxiumn Rx buffer + */ + +static void allocate_rx_buffer(struct uli526x_board_info *db) +{ + struct rx_desc *rxptr; + struct sk_buff *skb; + + rxptr = db->rx_insert_ptr; + + while(db->rx_avail_cnt < RX_DESC_CNT) { + if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL ) + break; + rxptr->rx_skb_ptr = skb; /* FIXME (?) */ + rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); + wmb(); + rxptr->rdes0 = cpu_to_le32(0x80000000); + rxptr = rxptr->next_rx_desc; + db->rx_avail_cnt++; + } + + db->rx_insert_ptr = rxptr; +} + + +/* + * Read one word data from the serial ROM + */ + +static u16 read_srom_word(long ioaddr, int offset) +{ + int i; + u16 srom_data = 0; + long cr9_ioaddr = ioaddr + DCR9; + + outl(CR9_SROM_READ, cr9_ioaddr); + outl(CR9_SROM_READ | CR9_SRCS, cr9_ioaddr); + + /* Send the Read Command 110b */ + SROM_CLK_WRITE(SROM_DATA_1, cr9_ioaddr); + SROM_CLK_WRITE(SROM_DATA_1, cr9_ioaddr); + SROM_CLK_WRITE(SROM_DATA_0, cr9_ioaddr); + + /* Send the offset */ + for (i = 5; i >= 0; i--) { + srom_data = (offset & (1 << i)) ? SROM_DATA_1 : SROM_DATA_0; + SROM_CLK_WRITE(srom_data, cr9_ioaddr); + } + + outl(CR9_SROM_READ | CR9_SRCS, cr9_ioaddr); + + for (i = 16; i > 0; i--) { + outl(CR9_SROM_READ | CR9_SRCS | CR9_SRCLK, cr9_ioaddr); + udelay(5); + srom_data = (srom_data << 1) | ((inl(cr9_ioaddr) & CR9_CRDOUT) ? 1 : 0); + outl(CR9_SROM_READ | CR9_SRCS, cr9_ioaddr); + udelay(5); + } + + outl(CR9_SROM_READ, cr9_ioaddr); + return srom_data; +} + + +/* + * Auto sense the media mode + */ + +static u8 uli526x_sense_speed(struct uli526x_board_info * db) +{ + u8 ErrFlag = 0; + u16 phy_mode; + + phy_mode = phy_read(db->ioaddr, db->phy_addr, 1, db->chip_id); + phy_mode = phy_read(db->ioaddr, db->phy_addr, 1, db->chip_id); + + if ( (phy_mode & 0x24) == 0x24 ) { + + phy_mode = ((phy_read(db->ioaddr, db->phy_addr, 5, db->chip_id) & 0x01e0)<<7); + if(phy_mode&0x8000) + phy_mode = 0x8000; + else if(phy_mode&0x4000) + phy_mode = 0x4000; + else if(phy_mode&0x2000) + phy_mode = 0x2000; + else + phy_mode = 0x1000; + + /* printk(DRV_NAME ": Phy_mode %x ",phy_mode); */ + switch (phy_mode) { + case 0x1000: db->op_mode = ULI526X_10MHF; break; + case 0x2000: db->op_mode = ULI526X_10MFD; break; + case 0x4000: db->op_mode = ULI526X_100MHF; break; + case 0x8000: db->op_mode = ULI526X_100MFD; break; + default: db->op_mode = ULI526X_10MHF; ErrFlag = 1; break; + } + } else { + db->op_mode = ULI526X_10MHF; + ULI526X_DBUG(0, "Link Failed :", phy_mode); + ErrFlag = 1; + } + + return ErrFlag; +} + + +/* + * Set 10/100 phyxcer capability + * AUTO mode : phyxcer register4 is NIC capability + * Force mode: phyxcer register4 is the force media + */ + +static void uli526x_set_phyxcer(struct uli526x_board_info *db) +{ + u16 phy_reg; + + /* Phyxcer capability setting */ + phy_reg = phy_read(db->ioaddr, db->phy_addr, 4, db->chip_id) & ~0x01e0; + + if (db->media_mode & ULI526X_AUTO) { + /* AUTO Mode */ + phy_reg |= db->PHY_reg4; + } else { + /* Force Mode */ + switch(db->media_mode) { + case ULI526X_10MHF: phy_reg |= 0x20; break; + case ULI526X_10MFD: phy_reg |= 0x40; break; + case ULI526X_100MHF: phy_reg |= 0x80; break; + case ULI526X_100MFD: phy_reg |= 0x100; break; + } + + } + + /* Write new capability to Phyxcer Reg4 */ + if ( !(phy_reg & 0x01e0)) { + phy_reg|=db->PHY_reg4; + db->media_mode|=ULI526X_AUTO; + } + phy_write(db->ioaddr, db->phy_addr, 4, phy_reg, db->chip_id); + + /* Restart Auto-Negotiation */ + phy_write(db->ioaddr, db->phy_addr, 0, 0x1200, db->chip_id); + udelay(50); +} + + +/* + * Process op-mode + AUTO mode : PHY controller in Auto-negotiation Mode + * Force mode: PHY controller in force mode with HUB + * N-way force capability with SWITCH + */ + +static void uli526x_process_mode(struct uli526x_board_info *db) +{ + u16 phy_reg; + + /* Full Duplex Mode Check */ + if (db->op_mode & 0x4) + db->cr6_data |= CR6_FDM; /* Set Full Duplex Bit */ + else + db->cr6_data &= ~CR6_FDM; /* Clear Full Duplex Bit */ + + update_cr6(db->cr6_data, db->ioaddr); + + /* 10/100M phyxcer force mode need */ + if ( !(db->media_mode & 0x8)) { + /* Forece Mode */ + phy_reg = phy_read(db->ioaddr, db->phy_addr, 6, db->chip_id); + if ( !(phy_reg & 0x1) ) { + /* parter without N-Way capability */ + phy_reg = 0x0; + switch(db->op_mode) { + case ULI526X_10MHF: phy_reg = 0x0; break; + case ULI526X_10MFD: phy_reg = 0x100; break; + case ULI526X_100MHF: phy_reg = 0x2000; break; + case ULI526X_100MFD: phy_reg = 0x2100; break; + } + phy_write(db->ioaddr, db->phy_addr, 0, phy_reg, db->chip_id); + phy_write(db->ioaddr, db->phy_addr, 0, phy_reg, db->chip_id); + } + } +} + + +/* + * Write a word to Phy register + */ + +static void phy_write(unsigned long iobase, u8 phy_addr, u8 offset, u16 phy_data, u32 chip_id) +{ + u16 i; + unsigned long ioaddr; + + if(chip_id == PCI_ULI5263_ID) + { + phy_writeby_cr10(iobase, phy_addr, offset, phy_data); + return; + } + /* M5261/M5263 Chip */ + ioaddr = iobase + DCR9; + + /* Send 33 synchronization clock to Phy controller */ + for (i = 0; i < 35; i++) + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + + /* Send start command(01) to Phy */ + phy_write_1bit(ioaddr, PHY_DATA_0, chip_id); + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + + /* Send write command(01) to Phy */ + phy_write_1bit(ioaddr, PHY_DATA_0, chip_id); + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + + /* Send Phy address */ + for (i = 0x10; i > 0; i = i >> 1) + phy_write_1bit(ioaddr, phy_addr & i ? PHY_DATA_1 : PHY_DATA_0, chip_id); + + /* Send register address */ + for (i = 0x10; i > 0; i = i >> 1) + phy_write_1bit(ioaddr, offset & i ? PHY_DATA_1 : PHY_DATA_0, chip_id); + + /* written trasnition */ + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + phy_write_1bit(ioaddr, PHY_DATA_0, chip_id); + + /* Write a word data to PHY controller */ + for ( i = 0x8000; i > 0; i >>= 1) + phy_write_1bit(ioaddr, phy_data & i ? PHY_DATA_1 : PHY_DATA_0, chip_id); + +} + + +/* + * Read a word data from phy register + */ + +static u16 phy_read(unsigned long iobase, u8 phy_addr, u8 offset, u32 chip_id) +{ + int i; + u16 phy_data; + unsigned long ioaddr; + + if(chip_id == PCI_ULI5263_ID) + return phy_readby_cr10(iobase, phy_addr, offset); + /* M5261/M5263 Chip */ + ioaddr = iobase + DCR9; + + /* Send 33 synchronization clock to Phy controller */ + for (i = 0; i < 35; i++) + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + + /* Send start command(01) to Phy */ + phy_write_1bit(ioaddr, PHY_DATA_0, chip_id); + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + + /* Send read command(10) to Phy */ + phy_write_1bit(ioaddr, PHY_DATA_1, chip_id); + phy_write_1bit(ioaddr, PHY_DATA_0, chip_id); + + /* Send Phy address */ + for (i = 0x10; i > 0; i = i >> 1) + phy_write_1bit(ioaddr, phy_addr & i ? PHY_DATA_1 : PHY_DATA_0, chip_id); + + /* Send register address */ + for (i = 0x10; i > 0; i = i >> 1) + phy_write_1bit(ioaddr, offset & i ? PHY_DATA_1 : PHY_DATA_0, chip_id); + + /* Skip transition state */ + phy_read_1bit(ioaddr, chip_id); + + /* read 16bit data */ + for (phy_data = 0, i = 0; i < 16; i++) { + phy_data <<= 1; + phy_data |= phy_read_1bit(ioaddr, chip_id); + } + + return phy_data; +} + +static u16 phy_readby_cr10(unsigned long iobase, u8 phy_addr, u8 offset) +{ + unsigned long ioaddr,cr10_value; + + ioaddr = iobase + DCR10; + cr10_value = phy_addr; + cr10_value = (cr10_value<<5) + offset; + cr10_value = (cr10_value<<16) + 0x08000000; + outl(cr10_value,ioaddr); + udelay(1); + while(1) + { + cr10_value = inl(ioaddr); + if(cr10_value&0x10000000) + break; + } + return (cr10_value&0x0ffff); +} + +static void phy_writeby_cr10(unsigned long iobase, u8 phy_addr, u8 offset, u16 phy_data) +{ + unsigned long ioaddr,cr10_value; + + ioaddr = iobase + DCR10; + cr10_value = phy_addr; + cr10_value = (cr10_value<<5) + offset; + cr10_value = (cr10_value<<16) + 0x04000000 + phy_data; + outl(cr10_value,ioaddr); + udelay(1); +} +/* + * Write one bit data to Phy Controller + */ + +static void phy_write_1bit(unsigned long ioaddr, u32 phy_data, u32 chip_id) +{ + outl(phy_data , ioaddr); /* MII Clock Low */ + udelay(1); + outl(phy_data | MDCLKH, ioaddr); /* MII Clock High */ + udelay(1); + outl(phy_data , ioaddr); /* MII Clock Low */ + udelay(1); +} + + +/* + * Read one bit phy data from PHY controller + */ + +static u16 phy_read_1bit(unsigned long ioaddr, u32 chip_id) +{ + u16 phy_data; + + outl(0x50000 , ioaddr); + udelay(1); + phy_data = ( inl(ioaddr) >> 19 ) & 0x1; + outl(0x40000 , ioaddr); + udelay(1); + + return phy_data; +} + + +static struct pci_device_id uli526x_pci_tbl[] = { + { 0x10B9, 0x5261, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_ULI5261_ID }, + { 0x10B9, 0x5263, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_ULI5263_ID }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, uli526x_pci_tbl); + + +static struct pci_driver uli526x_driver = { + .name = "uli526x", + .id_table = uli526x_pci_tbl, + .probe = uli526x_init_one, + .remove = __devexit_p(uli526x_remove_one), +}; + +MODULE_AUTHOR("Peer Chen, peer.chen@uli.com.tw"); +MODULE_DESCRIPTION("ULi M5261/M5263 fast ethernet driver"); +MODULE_LICENSE("GPL"); + +MODULE_PARM(debug, "i"); +MODULE_PARM(mode, "i"); +MODULE_PARM(cr6set, "i"); +MODULE_PARM_DESC(debug, "ULi M5261/M5263 enable debugging (0-1)"); +MODULE_PARM_DESC(mode, "ULi M5261/M5263: Bit 0: 10/100Mbps, bit 2: duplex, bit 8: HomePNA"); + +/* Description: + * when user used insmod to add module, system invoked init_module() + * to initilize and register. + */ + +static int __init uli526x_init_module(void) +{ + int rc; + + printk(version); + printed_version = 1; + + ULI526X_DBUG(0, "init_module() ", debug); + + if (debug) + uli526x_debug = debug; /* set debug flag */ + if (cr6set) + uli526x_cr6_user_set = cr6set; + + switch(mode) { + case ULI526X_10MHF: + case ULI526X_100MHF: + case ULI526X_10MFD: + case ULI526X_100MFD: + uli526x_media_mode = mode; + break; + default:uli526x_media_mode = ULI526X_AUTO; + break; + } + + rc = pci_module_init(&uli526x_driver); + if (rc < 0) + return rc; + + return 0; +} + + +/* + * Description: + * when user used rmmod to delete module, system invoked clean_module() + * to un-register all registered services. + */ + +static void __exit uli526x_cleanup_module(void) +{ + ULI526X_DBUG(0, "uli526x_clean_module() ", debug); + pci_unregister_driver(&uli526x_driver); +} + +module_init(uli526x_init_module); +module_exit(uli526x_cleanup_module); From 890e8d0a3ded0b1dee9020bbef7e9908e2228ffb Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:08:43 +0200 Subject: [PATCH 024/584] [PATCH] sis190: resurrection Raise the sis190 driver from the dead The driver handles the integrated network device found on SiS 965L chipset. It follows the classical (non-napi) interrupt-driven model and provides minimal ethtool support. The code comes from a heavy cleanup/rewrite of the original code which was removed from the kernel on 14/04/2004. Since the r8169 driver does not work too bad and there will probably be (at least) a few months of improvements/testing/fixing, I made the code as close as possible to the r8169 one. Pascal Chapperon deserves some special credit for testing and bug-catching. Many thanks to Lars Vahlenberg as well. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/Kconfig | 10 + drivers/net/Makefile | 1 + drivers/net/sis190.c | 1359 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1370 insertions(+) create mode 100644 drivers/net/sis190.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8a835eb58808..765fbb29d386 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1921,6 +1921,16 @@ config R8169_VLAN If in doubt, say Y. +config SIS190 + tristate "SiS190 gigabit ethernet support" + depends on PCI + select CRC32 + ---help--- + Say Y here if you have a SiS 190 PCI Gigabit Ethernet adapter. + + To compile this driver as a module, choose M here: the module + will be called sis190. This is recommended. + config SKGE tristate "New SysKonnect GigaEthernet support (EXPERIMENTAL)" depends on PCI && EXPERIMENTAL diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 63c6d1e6d4d9..67b280af425e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_EEPRO100) += eepro100.o obj-$(CONFIG_E100) += e100.o obj-$(CONFIG_TLAN) += tlan.o obj-$(CONFIG_EPIC100) += epic100.o +obj-$(CONFIG_SIS190) += sis190.o obj-$(CONFIG_SIS900) += sis900.o obj-$(CONFIG_YELLOWFIN) += yellowfin.o obj-$(CONFIG_ACENIC) += acenic.o diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c new file mode 100644 index 000000000000..fd303e7408ac --- /dev/null +++ b/drivers/net/sis190.c @@ -0,0 +1,1359 @@ +/* + sis190.c: Silicon Integrated Systems SiS190 ethernet driver + + Copyright (c) 2003 K.M. Liu + Copyright (c) 2003, 2004 Jeff Garzik + Copyright (c) 2003, 2004, 2005 Francois Romieu + + Based on r8169.c, tg3.c, 8139cp.c, skge.c and probably even epic100.c. + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + See the file COPYING in this distribution for more information. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define net_drv(p, arg...) if (netif_msg_drv(p)) \ + printk(arg) +#define net_probe(p, arg...) if (netif_msg_probe(p)) \ + printk(arg) +#define net_link(p, arg...) if (netif_msg_link(p)) \ + printk(arg) +#define net_intr(p, arg...) if (netif_msg_intr(p)) \ + printk(arg) +#define net_tx_err(p, arg...) if (netif_msg_tx_err(p)) \ + printk(arg) + +#ifdef CONFIG_SIS190_NAPI +#define NAPI_SUFFIX "-NAPI" +#else +#define NAPI_SUFFIX "" +#endif + +#define DRV_VERSION "1.2" NAPI_SUFFIX +#define DRV_NAME "sis190" +#define SIS190_DRIVER_NAME DRV_NAME " Gigabit Ethernet driver " DRV_VERSION +#define PFX DRV_NAME ": " + +#ifdef CONFIG_SIS190_NAPI +#define sis190_rx_skb netif_receive_skb +#define sis190_rx_quota(count, quota) min(count, quota) +#else +#define sis190_rx_skb netif_rx +#define sis190_rx_quota(count, quota) count +#endif + +#define MAC_ADDR_LEN 6 + +#define NUM_TX_DESC 64 +#define NUM_RX_DESC 64 +#define TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) +#define RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) +#define RX_BUF_SIZE 1536 + +#define SIS190_REGS_SIZE 0x80 +#define SIS190_TX_TIMEOUT (6*HZ) +#define SIS190_PHY_TIMEOUT (10*HZ) +#define SIS190_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \ + NETIF_MSG_LINK | NETIF_MSG_IFUP | \ + NETIF_MSG_IFDOWN) + +/* Enhanced PHY access register bit definitions */ +#define EhnMIIread 0x0000 +#define EhnMIIwrite 0x0020 +#define EhnMIIdataShift 16 +#define EhnMIIpmdShift 6 /* 7016 only */ +#define EhnMIIregShift 11 +#define EhnMIIreq 0x0010 +#define EhnMIInotDone 0x0010 + +/* Write/read MMIO register */ +#define SIS_W8(reg, val) writeb ((val), ioaddr + (reg)) +#define SIS_W16(reg, val) writew ((val), ioaddr + (reg)) +#define SIS_W32(reg, val) writel ((val), ioaddr + (reg)) +#define SIS_R8(reg) readb (ioaddr + (reg)) +#define SIS_R16(reg) readw (ioaddr + (reg)) +#define SIS_R32(reg) readl (ioaddr + (reg)) + +#define SIS_PCI_COMMIT() SIS_R32(IntrControl) + +enum sis190_registers { + TxControl = 0x00, + TxDescStartAddr = 0x04, + TxNextDescAddr = 0x0c, // unused + RxControl = 0x10, + RxDescStartAddr = 0x14, + RxNextDescAddr = 0x1c, // unused + IntrStatus = 0x20, + IntrMask = 0x24, + IntrControl = 0x28, + IntrTimer = 0x2c, // unused + PMControl = 0x30, // unused + ROMControl = 0x38, + ROMInterface = 0x3c, + StationControl = 0x40, + GMIIControl = 0x44, + TxMacControl = 0x50, + RxMacControl = 0x60, + RxMacAddr = 0x62, + RxHashTable = 0x68, + // Undocumented = 0x6c, + RxWakeOnLan = 0x70, + // Undocumented = 0x74, + RxMPSControl = 0x78, // unused +}; + +enum sis190_register_content { + /* IntrStatus */ + SoftInt = 0x40000000, // unused + Timeup = 0x20000000, // unused + PauseFrame = 0x00080000, // unused + MagicPacket = 0x00040000, // unused + WakeupFrame = 0x00020000, // unused + LinkChange = 0x00010000, + RxQEmpty = 0x00000080, + RxQInt = 0x00000040, + TxQ1Empty = 0x00000020, // unused + TxQ1Int = 0x00000010, + TxQ0Empty = 0x00000008, // unused + TxQ0Int = 0x00000004, + RxHalt = 0x00000002, + TxHalt = 0x00000001, + + /* RxStatusDesc */ + RxRES = 0x00200000, // unused + RxCRC = 0x00080000, + RxRUNT = 0x00100000, // unused + RxRWT = 0x00400000, // unused + + /* {Rx/Tx}CmdBits */ + CmdReset = 0x10, + CmdRxEnb = 0x08, // unused + CmdTxEnb = 0x01, + RxBufEmpty = 0x01, // unused + + /* Cfg9346Bits */ + Cfg9346_Lock = 0x00, // unused + Cfg9346_Unlock = 0xc0, // unused + + /* RxMacControl */ + AcceptErr = 0x20, // unused + AcceptRunt = 0x10, // unused + AcceptBroadcast = 0x0800, + AcceptMulticast = 0x0400, + AcceptMyPhys = 0x0200, + AcceptAllPhys = 0x0100, + + /* RxConfigBits */ + RxCfgFIFOShift = 13, + RxCfgDMAShift = 8, // 0x1a in RxControl ? + + /* TxConfigBits */ + TxInterFrameGapShift = 24, + TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */ + + /* StationControl */ + _1000bpsF = 0x1c00, + _1000bpsH = 0x0c00, + _100bpsF = 0x1800, + _100bpsH = 0x0800, + _10bpsF = 0x1400, + _10bpsH = 0x0400, + + LinkStatus = 0x02, // unused + FullDup = 0x01, // unused + + /* TBICSRBit */ + TBILinkOK = 0x02000000, // unused +}; + +struct TxDesc { + u32 PSize; + u32 status; + u32 addr; + u32 size; +}; + +struct RxDesc { + u32 PSize; + u32 status; + u32 addr; + u32 size; +}; + +enum _DescStatusBit { + /* _Desc.status */ + OWNbit = 0x80000000, + INTbit = 0x40000000, + DEFbit = 0x00200000, + CRCbit = 0x00020000, + PADbit = 0x00010000, + /* _Desc.size */ + RingEnd = (1 << 31), + /* _Desc.PSize */ + RxSizeMask = 0x0000ffff +}; + +struct sis190_private { + void __iomem *mmio_addr; + struct pci_dev *pci_dev; + struct net_device_stats stats; + spinlock_t lock; + u32 rx_buf_sz; + u32 cur_rx; + u32 cur_tx; + u32 dirty_rx; + u32 dirty_tx; + dma_addr_t rx_dma; + dma_addr_t tx_dma; + struct RxDesc *RxDescRing; + struct TxDesc *TxDescRing; + struct sk_buff *Rx_skbuff[NUM_RX_DESC]; + struct sk_buff *Tx_skbuff[NUM_TX_DESC]; + struct work_struct phy_task; + struct timer_list timer; + u32 msg_enable; +}; + +const static struct { + const char *name; + u8 version; /* depend on docs */ + u32 RxConfigMask; /* clear the bits supported by this chip */ +} sis_chip_info[] = { + { DRV_NAME, 0x00, 0xff7e1880, }, +}; + +static struct pci_device_id sis190_pci_tbl[] __devinitdata = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, 0x0190), 0, 0, 0 }, + { 0, }, +}; + +MODULE_DEVICE_TABLE(pci, sis190_pci_tbl); + +static int rx_copybreak = 200; + +static struct { + u32 msg_enable; +} debug = { -1 }; + +MODULE_DESCRIPTION("SiS sis190 Gigabit Ethernet driver"); +module_param(rx_copybreak, int, 0); +MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames"); +module_param_named(debug, debug.msg_enable, int, 0); +MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)"); +MODULE_AUTHOR("K.M. Liu , Ueimor "); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); + +static const u32 sis190_intr_mask = + RxQEmpty | RxQInt | TxQ1Int | TxQ0Int | RxHalt | TxHalt; + +/* + * Maximum number of multicast addresses to filter (vs. Rx-all-multicast). + * The chips use a 64 element hash table based on the Ethernet CRC. + */ +static int multicast_filter_limit = 32; + +static void __mdio_cmd(void __iomem *ioaddr, u32 ctl) +{ + unsigned int i; + + SIS_W32(GMIIControl, ctl); + + msleep(1); + + for (i = 0; i < 100; i++) { + if (!(SIS_R32(GMIIControl) & EhnMIInotDone)) + break; + msleep(1); + } + + if (i > 999) + printk(KERN_ERR PFX "PHY command failed !\n"); +} + +static void mdio_write(void __iomem *ioaddr, int reg, int val) +{ + u32 pmd = 1; + + __mdio_cmd(ioaddr, EhnMIIreq | EhnMIIwrite | + (((u32) reg) << EhnMIIregShift) | (pmd << EhnMIIpmdShift) | + (((u32) val) << EhnMIIdataShift)); +} + +static int mdio_read(void __iomem *ioaddr, int reg) +{ + u32 pmd = 1; + + __mdio_cmd(ioaddr, EhnMIIreq | EhnMIIread | + (((u32) reg) << EhnMIIregShift) | (pmd << EhnMIIpmdShift)); + + return (u16) (SIS_R32(GMIIControl) >> EhnMIIdataShift); +} + +static int sis190_read_eeprom(void __iomem *ioaddr, u32 reg) +{ + unsigned int i; + u16 data; + u32 val; + + if (!(SIS_R32(ROMControl) & 0x0002)) + return 0; + + val = (0x0080 | (0x2 << 8) | (reg << 10)); + + SIS_W32(ROMInterface, val); + + for (i = 0; i < 200; i++) { + if (!(SIS_R32(ROMInterface) & 0x0080)) + break; + msleep(1); + } + + data = (u16) ((SIS_R32(ROMInterface) & 0xffff0000) >> 16); + + return data; +} + +static void sis190_irq_mask_and_ack(void __iomem *ioaddr) +{ + SIS_W32(IntrMask, 0x00); + SIS_W32(IntrStatus, 0xffffffff); + SIS_PCI_COMMIT(); +} + +static void sis190_asic_down(void __iomem *ioaddr) +{ + /* Stop the chip's Tx and Rx DMA processes. */ + + SIS_W32(TxControl, 0x1a00); + SIS_W32(RxControl, 0x1a00); + + sis190_irq_mask_and_ack(ioaddr); +} + +static void sis190_mark_as_last_descriptor(struct RxDesc *desc) +{ + desc->size |= cpu_to_le32(RingEnd); +} + +static inline void sis190_give_to_asic(struct RxDesc *desc, u32 rx_buf_sz) +{ + u32 eor = le32_to_cpu(desc->size) & RingEnd; + + desc->PSize = 0x0; + desc->size = cpu_to_le32(rx_buf_sz | eor); + wmb(); + desc->status = cpu_to_le32(OWNbit | INTbit); +} + +static inline void sis190_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, + u32 rx_buf_sz) +{ + desc->addr = cpu_to_le32(mapping); + sis190_give_to_asic(desc, rx_buf_sz); +} + +static inline void sis190_make_unusable_by_asic(struct RxDesc *desc) +{ + desc->PSize = 0x0; + desc->addr = 0xdeadbeef; + desc->size &= cpu_to_le32(RingEnd); + wmb(); + desc->status = 0x0; +} + +static int sis190_alloc_rx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff, + struct RxDesc *desc, u32 rx_buf_sz) +{ + struct sk_buff *skb; + dma_addr_t mapping; + int ret = 0; + + skb = dev_alloc_skb(rx_buf_sz); + if (!skb) + goto err_out; + + *sk_buff = skb; + + mapping = pci_map_single(pdev, skb->data, rx_buf_sz, + PCI_DMA_FROMDEVICE); + + sis190_map_to_asic(desc, mapping, rx_buf_sz); +out: + return ret; + +err_out: + ret = -ENOMEM; + sis190_make_unusable_by_asic(desc); + goto out; +} + +static u32 sis190_rx_fill(struct sis190_private *tp, struct net_device *dev, + u32 start, u32 end) +{ + u32 cur; + + for (cur = start; cur < end; cur++) { + int ret, i = cur % NUM_RX_DESC; + + if (tp->Rx_skbuff[i]) + continue; + + ret = sis190_alloc_rx_skb(tp->pci_dev, tp->Rx_skbuff + i, + tp->RxDescRing + i, tp->rx_buf_sz); + if (ret < 0) + break; + } + return cur - start; +} + +static inline int sis190_try_rx_copy(struct sk_buff **sk_buff, int pkt_size, + struct RxDesc *desc, int rx_buf_sz) +{ + int ret = -1; + + if (pkt_size < rx_copybreak) { + struct sk_buff *skb; + + skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN); + if (skb) { + skb_reserve(skb, NET_IP_ALIGN); + eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); + *sk_buff = skb; + sis190_give_to_asic(desc, rx_buf_sz); + ret = 0; + } + } + return ret; +} + +static int sis190_rx_interrupt(struct net_device *dev, + struct sis190_private *tp, void __iomem *ioaddr) +{ + struct net_device_stats *stats = &tp->stats; + u32 rx_left, cur_rx = tp->cur_rx; + u32 delta, count; + + rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; + rx_left = sis190_rx_quota(rx_left, (u32) dev->quota); + + for (; rx_left > 0; rx_left--, cur_rx++) { + unsigned int entry = cur_rx % NUM_RX_DESC; + struct RxDesc *desc = tp->RxDescRing + entry; + u32 status; + + if (desc->status & OWNbit) + break; + + status = le32_to_cpu(desc->PSize); + + // net_intr(tp, KERN_INFO "%s: Rx PSize = %08x.\n", dev->name, + // status); + + if (status & RxCRC) { + net_intr(tp, KERN_INFO "%s: bad crc. status = %08x.\n", + dev->name, status); + stats->rx_errors++; + stats->rx_crc_errors++; + sis190_give_to_asic(desc, tp->rx_buf_sz); + } else if (!(status & PADbit)) { + net_intr(tp, KERN_INFO "%s: bad pad. status = %08x.\n", + dev->name, status); + stats->rx_errors++; + stats->rx_length_errors++; + sis190_give_to_asic(desc, tp->rx_buf_sz); + } else { + struct sk_buff *skb = tp->Rx_skbuff[entry]; + int pkt_size = (status & RxSizeMask) - 4; + void (*pci_action)(struct pci_dev *, dma_addr_t, + size_t, int) = pci_dma_sync_single_for_device; + + if (unlikely(pkt_size > tp->rx_buf_sz)) { + net_intr(tp, KERN_INFO + "%s: (frag) status = %08x.\n", + dev->name, status); + stats->rx_dropped++; + stats->rx_length_errors++; + sis190_give_to_asic(desc, tp->rx_buf_sz); + continue; + } + + pci_dma_sync_single_for_cpu(tp->pci_dev, + le32_to_cpu(desc->addr), tp->rx_buf_sz, + PCI_DMA_FROMDEVICE); + + if (sis190_try_rx_copy(&skb, pkt_size, desc, + tp->rx_buf_sz)) { + pci_action = pci_unmap_single; + tp->Rx_skbuff[entry] = NULL; + sis190_make_unusable_by_asic(desc); + } + + pci_action(tp->pci_dev, le32_to_cpu(desc->addr), + tp->rx_buf_sz, PCI_DMA_FROMDEVICE); + + skb->dev = dev; + skb_put(skb, pkt_size); + skb->protocol = eth_type_trans(skb, dev); + + sis190_rx_skb(skb); + + dev->last_rx = jiffies; + stats->rx_bytes += pkt_size; + stats->rx_packets++; + } + } + count = cur_rx - tp->cur_rx; + tp->cur_rx = cur_rx; + + delta = sis190_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx); + if (!delta && count && netif_msg_intr(tp)) + printk(KERN_INFO "%s: no Rx buffer allocated.\n", dev->name); + tp->dirty_rx += delta; + + if (((tp->dirty_rx + NUM_RX_DESC) == tp->cur_rx) && netif_msg_intr(tp)) + printk(KERN_EMERG "%s: Rx buffers exhausted.\n", dev->name); + + return count; +} + +static void sis190_unmap_tx_skb(struct pci_dev *pdev, struct sk_buff *skb, + struct TxDesc *desc) +{ + unsigned int len; + + len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; + + pci_unmap_single(pdev, le32_to_cpu(desc->addr), len, PCI_DMA_TODEVICE); + + memset(desc, 0x00, sizeof(*desc)); +} + +static void sis190_tx_interrupt(struct net_device *dev, + struct sis190_private *tp, void __iomem *ioaddr) +{ + u32 pending, dirty_tx = tp->dirty_tx; + /* + * It would not be needed if queueing was allowed to be enabled + * again too early (hint: think preempt and unclocked smp systems). + */ + unsigned int queue_stopped; + + smp_rmb(); + pending = tp->cur_tx - dirty_tx; + queue_stopped = (pending == NUM_TX_DESC); + + for (; pending; pending--, dirty_tx++) { + unsigned int entry = dirty_tx % NUM_TX_DESC; + struct TxDesc *txd = tp->TxDescRing + entry; + struct sk_buff *skb; + + if (le32_to_cpu(txd->status) & OWNbit) + break; + + skb = tp->Tx_skbuff[entry]; + + tp->stats.tx_packets++; + tp->stats.tx_bytes += skb->len; + + sis190_unmap_tx_skb(tp->pci_dev, skb, txd); + tp->Tx_skbuff[entry] = NULL; + dev_kfree_skb_irq(skb); + } + + if (tp->dirty_tx != dirty_tx) { + tp->dirty_tx = dirty_tx; + smp_wmb(); + if (queue_stopped) + netif_wake_queue(dev); + } +} + +/* + * The interrupt handler does all of the Rx thread work and cleans up after + * the Tx thread. + */ +static irqreturn_t sis190_interrupt(int irq, void *__dev, struct pt_regs *regs) +{ + struct net_device *dev = __dev; + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + unsigned int handled = 0; + u32 status; + + status = SIS_R32(IntrStatus); + + if ((status == 0xffffffff) || !status) + goto out; + + handled = 1; + + if (unlikely(!netif_running(dev))) { + sis190_asic_down(ioaddr); + goto out; + } + + SIS_W32(IntrStatus, status); + + // net_intr(tp, KERN_INFO "%s: status = %08x.\n", dev->name, status); + + if (status & LinkChange) { + net_intr(tp, KERN_INFO "%s: link change.\n", dev->name); + schedule_work(&tp->phy_task); + } + + if (status & RxQInt) + sis190_rx_interrupt(dev, tp, ioaddr); + + if (status & TxQ0Int) + sis190_tx_interrupt(dev, tp, ioaddr); +out: + return IRQ_RETVAL(handled); +} + +static void sis190_free_rx_skb(struct sis190_private *tp, + struct sk_buff **sk_buff, struct RxDesc *desc) +{ + struct pci_dev *pdev = tp->pci_dev; + + pci_unmap_single(pdev, le32_to_cpu(desc->addr), tp->rx_buf_sz, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(*sk_buff); + *sk_buff = NULL; + sis190_make_unusable_by_asic(desc); +} + +static void sis190_rx_clear(struct sis190_private *tp) +{ + unsigned int i; + + for (i = 0; i < NUM_RX_DESC; i++) { + if (!tp->Rx_skbuff[i]) + continue; + sis190_free_rx_skb(tp, tp->Rx_skbuff + i, tp->RxDescRing + i); + } +} + +static void sis190_init_ring_indexes(struct sis190_private *tp) +{ + tp->dirty_tx = tp->dirty_rx = tp->cur_tx = tp->cur_rx = 0; +} + +static int sis190_init_ring(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + sis190_init_ring_indexes(tp); + + memset(tp->Tx_skbuff, 0x0, NUM_TX_DESC * sizeof(struct sk_buff *)); + memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *)); + + if (sis190_rx_fill(tp, dev, 0, NUM_RX_DESC) != NUM_RX_DESC) + goto err_rx_clear; + + sis190_mark_as_last_descriptor(tp->RxDescRing + NUM_RX_DESC - 1); + + return 0; + +err_rx_clear: + sis190_rx_clear(tp); + return -ENOMEM; +} + +static void sis190_set_rx_mode(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + unsigned long flags; + u32 mc_filter[2]; /* Multicast hash filter */ + u16 rx_mode; + + if (dev->flags & IFF_PROMISC) { + /* Unconditionally log net taps. */ + net_drv(tp, KERN_NOTICE "%s: Promiscuous mode enabled.\n", + dev->name); + rx_mode = + AcceptBroadcast | AcceptMulticast | AcceptMyPhys | + AcceptAllPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else if ((dev->mc_count > multicast_filter_limit) || + (dev->flags & IFF_ALLMULTI)) { + /* Too many to filter perfectly -- accept all multicasts. */ + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else { + struct dev_mc_list *mclist; + unsigned int i; + + rx_mode = AcceptBroadcast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0; + for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; + i++, mclist = mclist->next) { + int bit_nr = + ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26; + mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); + rx_mode |= AcceptMulticast; + } + } + + spin_lock_irqsave(&tp->lock, flags); + + SIS_W16(RxMacControl, rx_mode | 0x2); + SIS_W32(RxHashTable, mc_filter[0]); + SIS_W32(RxHashTable + 4, mc_filter[1]); + + spin_unlock_irqrestore(&tp->lock, flags); +} + +static void sis190_soft_reset(void __iomem *ioaddr) +{ + SIS_W32(IntrControl, 0x8000); + SIS_PCI_COMMIT(); + msleep(1); + SIS_W32(IntrControl, 0x0); + sis190_asic_down(ioaddr); + msleep(1); +} + +static void sis190_hw_start(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + + sis190_soft_reset(ioaddr); + + SIS_W32(TxDescStartAddr, tp->tx_dma); + SIS_W32(RxDescStartAddr, tp->rx_dma); + + SIS_W32(IntrStatus, 0xffffffff); + SIS_W32(IntrMask, 0x0); + /* + * Default is 100Mbps. + * A bit strange: 100Mbps is 0x1801 elsewhere -- FR 2005/06/09 + */ + SIS_W16(StationControl, 0x1901); + SIS_W32(GMIIControl, 0x0); + SIS_W32(TxMacControl, 0x60); + SIS_W16(RxMacControl, 0x02); + SIS_W32(RxHashTable, 0x0); + SIS_W32(0x6c, 0x0); + SIS_W32(RxWakeOnLan, 0x0); + SIS_W32(0x74, 0x0); + + SIS_PCI_COMMIT(); + + sis190_set_rx_mode(dev); + + /* Enable all known interrupts by setting the interrupt mask. */ + SIS_W32(IntrMask, sis190_intr_mask); + + SIS_W32(TxControl, 0x1a00 | CmdTxEnb); + SIS_W32(RxControl, 0x1a1d); + + netif_start_queue(dev); +} + +static void sis190_phy_task(void * data) +{ + struct net_device *dev = data; + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + u16 val; + + val = mdio_read(ioaddr, MII_BMCR); + if (val & BMCR_RESET) { + // FIXME: needlessly high ? -- FR 02/07/2005 + mod_timer(&tp->timer, jiffies + HZ/10); + } else if (!(mdio_read(ioaddr, MII_BMSR) & BMSR_ANEGCOMPLETE)) { + net_link(tp, KERN_WARNING "%s: PHY reset until link up.\n", + dev->name); + mdio_write(ioaddr, MII_BMCR, val | BMCR_RESET); + mod_timer(&tp->timer, jiffies + SIS190_PHY_TIMEOUT); + } else { + /* Rejoice ! */ + struct { + int val; + const char *msg; + u16 ctl; + } reg31[] = { + { LPA_1000XFULL | LPA_SLCT, + "1000 Mbps Full Duplex", + 0x01 | _1000bpsF }, + { LPA_1000XHALF | LPA_SLCT, + "1000 Mbps Half Duplex", + 0x01 | _1000bpsH }, + { LPA_100FULL, + "100 Mbps Full Duplex", + 0x01 | _100bpsF }, + { LPA_100HALF, + "100 Mbps Half Duplex", + 0x01 | _100bpsH }, + { LPA_10FULL, + "10 Mbps Full Duplex", + 0x01 | _10bpsF }, + { LPA_10HALF, + "10 Mbps Half Duplex", + 0x01 | _10bpsH }, + { 0, "unknown", 0x0000 } + }, *p; + + val = mdio_read(ioaddr, 0x1f); + net_link(tp, KERN_INFO "%s: mii ext = %04x.\n", dev->name, val); + + val = mdio_read(ioaddr, MII_LPA); + net_link(tp, KERN_INFO "%s: mii lpa = %04x.\n", dev->name, val); + + for (p = reg31; p->ctl; p++) { + if ((val & p->val) == p->val) + break; + } + if (p->ctl) + SIS_W16(StationControl, p->ctl); + net_link(tp, KERN_INFO "%s: link on %s mode.\n", dev->name, + p->msg); + netif_carrier_on(dev); + } +} + +static void sis190_phy_timer(unsigned long __opaque) +{ + struct net_device *dev = (struct net_device *)__opaque; + struct sis190_private *tp = netdev_priv(dev); + + if (likely(netif_running(dev))) + schedule_work(&tp->phy_task); +} + +static inline void sis190_delete_timer(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + del_timer_sync(&tp->timer); +} + +static inline void sis190_request_timer(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct timer_list *timer = &tp->timer; + + init_timer(timer); + timer->expires = jiffies + SIS190_PHY_TIMEOUT; + timer->data = (unsigned long)dev; + timer->function = sis190_phy_timer; + add_timer(timer); +} + +static void sis190_set_rxbufsize(struct sis190_private *tp, + struct net_device *dev) +{ + unsigned int mtu = dev->mtu; + + tp->rx_buf_sz = (mtu > RX_BUF_SIZE) ? mtu + ETH_HLEN + 8 : RX_BUF_SIZE; +} + +static int sis190_open(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct pci_dev *pdev = tp->pci_dev; + int rc = -ENOMEM; + + sis190_set_rxbufsize(tp, dev); + + /* + * Rx and Tx descriptors need 256 bytes alignment. + * pci_alloc_consistent() guarantees a stronger alignment. + */ + tp->TxDescRing = pci_alloc_consistent(pdev, TX_RING_BYTES, &tp->tx_dma); + if (!tp->TxDescRing) + goto out; + + tp->RxDescRing = pci_alloc_consistent(pdev, RX_RING_BYTES, &tp->rx_dma); + if (!tp->RxDescRing) + goto err_free_tx_0; + + rc = sis190_init_ring(dev); + if (rc < 0) + goto err_free_rx_1; + + INIT_WORK(&tp->phy_task, sis190_phy_task, dev); + + sis190_request_timer(dev); + + rc = request_irq(dev->irq, sis190_interrupt, SA_SHIRQ, dev->name, dev); + if (rc < 0) + goto err_release_timer_2; + + sis190_hw_start(dev); +out: + return rc; + +err_release_timer_2: + sis190_delete_timer(dev); + sis190_rx_clear(tp); +err_free_rx_1: + pci_free_consistent(tp->pci_dev, RX_RING_BYTES, tp->RxDescRing, + tp->rx_dma); +err_free_tx_0: + pci_free_consistent(tp->pci_dev, TX_RING_BYTES, tp->TxDescRing, + tp->tx_dma); + goto out; +} + +static void sis190_tx_clear(struct sis190_private *tp) +{ + unsigned int i; + + for (i = 0; i < NUM_TX_DESC; i++) { + struct sk_buff *skb = tp->Tx_skbuff[i]; + + if (!skb) + continue; + + sis190_unmap_tx_skb(tp->pci_dev, skb, tp->TxDescRing + i); + tp->Tx_skbuff[i] = NULL; + dev_kfree_skb(skb); + + tp->stats.tx_dropped++; + } + tp->cur_tx = tp->dirty_tx = 0; +} + +static void sis190_down(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + unsigned int poll_locked = 0; + + sis190_delete_timer(dev); + + netif_stop_queue(dev); + + flush_scheduled_work(); + + do { + spin_lock_irq(&tp->lock); + + sis190_asic_down(ioaddr); + + spin_unlock_irq(&tp->lock); + + synchronize_irq(dev->irq); + + if (!poll_locked) { + netif_poll_disable(dev); + poll_locked++; + } + + synchronize_sched(); + + } while (SIS_R32(IntrMask)); + + sis190_tx_clear(tp); + sis190_rx_clear(tp); +} + +static int sis190_close(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct pci_dev *pdev = tp->pci_dev; + + sis190_down(dev); + + free_irq(dev->irq, dev); + + netif_poll_enable(dev); + + pci_free_consistent(pdev, TX_RING_BYTES, tp->TxDescRing, tp->tx_dma); + pci_free_consistent(pdev, RX_RING_BYTES, tp->RxDescRing, tp->rx_dma); + + tp->TxDescRing = NULL; + tp->RxDescRing = NULL; + + return 0; +} + +static int sis190_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + u32 len, entry, dirty_tx; + struct TxDesc *desc; + dma_addr_t mapping; + + if (unlikely(skb->len < ETH_ZLEN)) { + skb = skb_padto(skb, ETH_ZLEN); + if (!skb) { + tp->stats.tx_dropped++; + goto out; + } + len = ETH_ZLEN; + } else { + len = skb->len; + } + + entry = tp->cur_tx % NUM_TX_DESC; + desc = tp->TxDescRing + entry; + + if (unlikely(le32_to_cpu(desc->status) & OWNbit)) { + netif_stop_queue(dev); + net_tx_err(tp, KERN_ERR PFX + "%s: BUG! Tx Ring full when queue awake!\n", + dev->name); + return NETDEV_TX_BUSY; + } + + mapping = pci_map_single(tp->pci_dev, skb->data, len, PCI_DMA_TODEVICE); + + tp->Tx_skbuff[entry] = skb; + + desc->PSize = cpu_to_le32(len); + desc->addr = cpu_to_le32(mapping); + + desc->size = cpu_to_le32(len); + if (entry == (NUM_TX_DESC - 1)) + desc->size |= cpu_to_le32(RingEnd); + + wmb(); + + desc->status = cpu_to_le32(OWNbit | INTbit | DEFbit | CRCbit | PADbit); + + tp->cur_tx++; + + smp_wmb(); + + SIS_W32(TxControl, 0x1a00 | CmdReset | CmdTxEnb); + + dev->trans_start = jiffies; + + dirty_tx = tp->dirty_tx; + if ((tp->cur_tx - NUM_TX_DESC) == dirty_tx) { + netif_stop_queue(dev); + smp_rmb(); + if (dirty_tx != tp->dirty_tx) + netif_wake_queue(dev); + } +out: + return NETDEV_TX_OK; +} + +static struct net_device_stats *sis190_get_stats(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + return &tp->stats; +} + +static void sis190_release_board(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct sis190_private *tp = netdev_priv(dev); + + iounmap(tp->mmio_addr); + pci_release_regions(pdev); + pci_disable_device(pdev); + free_netdev(dev); +} + +static struct net_device * __devinit sis190_init_board(struct pci_dev *pdev) +{ + struct sis190_private *tp; + struct net_device *dev; + void __iomem *ioaddr; + int rc; + + dev = alloc_etherdev(sizeof(*tp)); + if (!dev) { + net_drv(&debug, KERN_ERR PFX "unable to alloc new ethernet\n"); + rc = -ENOMEM; + goto err_out_0; + } + + SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + tp = netdev_priv(dev); + tp->msg_enable = netif_msg_init(debug.msg_enable, SIS190_MSG_DEFAULT); + + rc = pci_enable_device(pdev); + if (rc < 0) { + net_probe(tp, KERN_ERR "%s: enable failure\n", pci_name(pdev)); + goto err_free_dev_1; + } + + rc = -ENODEV; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + net_probe(tp, KERN_ERR "%s: region #0 is no MMIO resource.\n", + pci_name(pdev)); + goto err_pci_disable_2; + } + if (pci_resource_len(pdev, 0) < SIS190_REGS_SIZE) { + net_probe(tp, KERN_ERR "%s: invalid PCI region size(s).\n", + pci_name(pdev)); + goto err_pci_disable_2; + } + + rc = pci_request_regions(pdev, DRV_NAME); + if (rc < 0) { + net_probe(tp, KERN_ERR PFX "%s: could not request regions.\n", + pci_name(pdev)); + goto err_pci_disable_2; + } + + rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (rc < 0) { + net_probe(tp, KERN_ERR "%s: DMA configuration failed.\n", + pci_name(pdev)); + goto err_free_res_3; + } + + pci_set_master(pdev); + + ioaddr = ioremap(pci_resource_start(pdev, 0), SIS190_REGS_SIZE); + if (!ioaddr) { + net_probe(tp, KERN_ERR "%s: cannot remap MMIO, aborting\n", + pci_name(pdev)); + rc = -EIO; + goto err_free_res_3; + } + + tp->pci_dev = pdev; + tp->mmio_addr = ioaddr; + + sis190_irq_mask_and_ack(ioaddr); + + sis190_soft_reset(ioaddr); +out: + return dev; + +err_free_res_3: + pci_release_regions(pdev); +err_pci_disable_2: + pci_disable_device(pdev); +err_free_dev_1: + free_netdev(dev); +err_out_0: + dev = ERR_PTR(rc); + goto out; +} + +static void sis190_tx_timeout(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + u8 tmp8; + + /* Disable Tx, if not already */ + tmp8 = SIS_R8(TxControl); + if (tmp8 & CmdTxEnb) + SIS_W8(TxControl, tmp8 & ~CmdTxEnb); + + /* Disable interrupts by clearing the interrupt mask. */ + SIS_W32(IntrMask, 0x0000); + + /* Stop a shared interrupt from scavenging while we are. */ + spin_lock_irq(&tp->lock); + sis190_tx_clear(tp); + spin_unlock_irq(&tp->lock); + + /* ...and finally, reset everything. */ + sis190_hw_start(dev); + + netif_wake_queue(dev); +} + +static void sis190_set_speed_auto(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + int val; + + net_link(tp, KERN_INFO "%s: Enabling Auto-negotiation.\n", dev->name); + + val = mdio_read(ioaddr, MII_ADVERTISE); + + // Enable 10/100 Full/Half Mode, leave MII_ADVERTISE bit4:0 + // unchanged. + mdio_write(ioaddr, MII_ADVERTISE, (val & ADVERTISE_SLCT) | + ADVERTISE_100FULL | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_10HALF); + + // Enable 1000 Full Mode. + mdio_write(ioaddr, MII_CTRL1000, ADVERTISE_1000FULL); + + // Enable auto-negotiation and restart auto-negotiation. + mdio_write(ioaddr, MII_BMCR, + BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET); +} + +static void sis190_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct sis190_private *tp = netdev_priv(dev); + + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->bus_info, pci_name(tp->pci_dev)); +} + +static int sis190_get_regs_len(struct net_device *dev) +{ + return SIS190_REGS_SIZE; +} + +static void sis190_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *p) +{ + struct sis190_private *tp = netdev_priv(dev); + unsigned long flags; + + if (regs->len > SIS190_REGS_SIZE) + regs->len = SIS190_REGS_SIZE; + + spin_lock_irqsave(&tp->lock, flags); + memcpy_fromio(p, tp->mmio_addr, regs->len); + spin_unlock_irqrestore(&tp->lock, flags); +} + +static u32 sis190_get_msglevel(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + return tp->msg_enable; +} + +static void sis190_set_msglevel(struct net_device *dev, u32 value) +{ + struct sis190_private *tp = netdev_priv(dev); + + tp->msg_enable = value; +} + +static struct ethtool_ops sis190_ethtool_ops = { + .get_drvinfo = sis190_get_drvinfo, + .get_regs_len = sis190_get_regs_len, + .get_regs = sis190_get_regs, + .get_link = ethtool_op_get_link, + .get_msglevel = sis190_get_msglevel, + .set_msglevel = sis190_set_msglevel, +}; + +static int __devinit sis190_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + static int printed_version = 0; + struct sis190_private *tp; + struct net_device *dev; + void __iomem *ioaddr; + int i, rc; + + if (!printed_version) { + net_drv(&debug, KERN_INFO SIS190_DRIVER_NAME " loaded.\n"); + printed_version = 1; + } + + dev = sis190_init_board(pdev); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto out; + } + + tp = netdev_priv(dev); + ioaddr = tp->mmio_addr; + + /* Get MAC address */ + /* Read node address from the EEPROM */ + + if (SIS_R32(ROMControl) & 0x4) { + for (i = 0; i < 3; i++) { + SIS_W16(RxMacAddr + 2*i, + sis190_read_eeprom(ioaddr, 3 + i)); + } + } + + for (i = 0; i < MAC_ADDR_LEN; i++) + dev->dev_addr[i] = SIS_R8(RxMacAddr + i); + + INIT_WORK(&tp->phy_task, sis190_phy_task, dev); + + dev->open = sis190_open; + dev->stop = sis190_close; + dev->get_stats = sis190_get_stats; + dev->tx_timeout = sis190_tx_timeout; + dev->watchdog_timeo = SIS190_TX_TIMEOUT; + dev->hard_start_xmit = sis190_start_xmit; + dev->set_multicast_list = sis190_set_rx_mode; + SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops); + dev->irq = pdev->irq; + dev->base_addr = (unsigned long) 0xdead; + + spin_lock_init(&tp->lock); + rc = register_netdev(dev); + if (rc < 0) { + sis190_release_board(pdev); + goto out; + } + + pci_set_drvdata(pdev, dev); + + net_probe(tp, KERN_INFO "%s: %s at %p (IRQ: %d), " + "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", + pci_name(pdev), sis_chip_info[ent->driver_data].name, + ioaddr, dev->irq, + dev->dev_addr[0], dev->dev_addr[1], + dev->dev_addr[2], dev->dev_addr[3], + dev->dev_addr[4], dev->dev_addr[5]); + + netif_carrier_off(dev); + + sis190_set_speed_auto(dev); +out: + return rc; +} + +static void __devexit sis190_remove_one(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + unregister_netdev(dev); + sis190_release_board(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver sis190_pci_driver = { + .name = DRV_NAME, + .id_table = sis190_pci_tbl, + .probe = sis190_init_one, + .remove = __devexit_p(sis190_remove_one), +}; + +static int __init sis190_init_module(void) +{ + return pci_module_init(&sis190_pci_driver); +} + +static void __exit sis190_cleanup_module(void) +{ + pci_unregister_driver(&sis190_pci_driver); +} + +module_init(sis190_init_module); +module_exit(sis190_cleanup_module); From 4405d3b5ef0a870e8d70ee4a3d050c89fcc40a86 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:09:20 +0200 Subject: [PATCH 025/584] [PATCH] sis190: netconsole support. netconsole support. This stuff should be factored out of every driver. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index fd303e7408ac..e374cf43fed0 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -629,6 +629,18 @@ out: return IRQ_RETVAL(handled); } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void sis190_netpoll(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct pci_dev *pdev = tp->pci_dev; + + disable_irq(pdev->irq); + sis190_interrupt(pdev->irq, dev, NULL); + enable_irq(pdev->irq); +} +#endif + static void sis190_free_rx_skb(struct sis190_private *tp, struct sk_buff **sk_buff, struct RxDesc *desc) { @@ -1300,6 +1312,9 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, dev->tx_timeout = sis190_tx_timeout; dev->watchdog_timeo = SIS190_TX_TIMEOUT; dev->hard_start_xmit = sis190_start_xmit; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = sis190_netpoll; +#endif dev->set_multicast_list = sis190_set_rx_mode; SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops); dev->irq = pdev->irq; From 43afb949a955a7d88f4baf43d5c676bf4c31ff6c Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:10:21 +0200 Subject: [PATCH 026/584] [PATCH] sis190: ethtool/mii support. ethtool/mii support Bug: disabling autonegotiation and setting the link parameters at the same time does not provide the expected result. More investigation is needed. Note: past the initial probe/open time, the link is managed from user-space or accessed through sis190_phy_task, i.e. in a usermode context. Whence the very limited locking needs. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index e374cf43fed0..ff4f24e5f59c 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -230,6 +231,7 @@ struct sis190_private { struct work_struct phy_task; struct timer_list timer; u32 msg_enable; + struct mii_if_info mii_if; }; const static struct { @@ -308,6 +310,20 @@ static int mdio_read(void __iomem *ioaddr, int reg) return (u16) (SIS_R32(GMIIControl) >> EhnMIIdataShift); } +static void __mdio_write(struct net_device *dev, int phy_id, int reg, int val) +{ + struct sis190_private *tp = netdev_priv(dev); + + mdio_write(tp->mmio_addr, reg, val); +} + +static int __mdio_read(struct net_device *dev, int phy_id, int reg) +{ + struct sis190_private *tp = netdev_priv(dev); + + return mdio_read(tp->mmio_addr, reg); +} + static int sis190_read_eeprom(void __iomem *ioaddr, u32 reg) { unsigned int i; @@ -790,6 +806,8 @@ static void sis190_phy_task(void * data) void __iomem *ioaddr = tp->mmio_addr; u16 val; + rtnl_lock(); + val = mdio_read(ioaddr, MII_BMCR); if (val & BMCR_RESET) { // FIXME: needlessly high ? -- FR 02/07/2005 @@ -843,6 +861,8 @@ static void sis190_phy_task(void * data) p->msg); netif_carrier_on(dev); } + + rtnl_unlock(); } static void sis190_phy_timer(unsigned long __opaque) @@ -1150,6 +1170,13 @@ static struct net_device * __devinit sis190_init_board(struct pci_dev *pdev) tp->pci_dev = pdev; tp->mmio_addr = ioaddr; + tp->mii_if.dev = dev; + tp->mii_if.mdio_read = __mdio_read; + tp->mii_if.mdio_write = __mdio_write; + // tp->mii_if.phy_id = XXX; + tp->mii_if.phy_id_mask = 0x1f; + tp->mii_if.reg_num_mask = 0x1f; + sis190_irq_mask_and_ack(ioaddr); sis190_soft_reset(ioaddr); @@ -1216,6 +1243,20 @@ static void sis190_set_speed_auto(struct net_device *dev) BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET); } +static int sis190_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct sis190_private *tp = netdev_priv(dev); + + return mii_ethtool_gset(&tp->mii_if, cmd); +} + +static int sis190_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct sis190_private *tp = netdev_priv(dev); + + return mii_ethtool_sset(&tp->mii_if, cmd); +} + static void sis190_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -1245,6 +1286,13 @@ static void sis190_get_regs(struct net_device *dev, struct ethtool_regs *regs, spin_unlock_irqrestore(&tp->lock, flags); } +static int sis190_nway_reset(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + return mii_nway_restart(&tp->mii_if); +} + static u32 sis190_get_msglevel(struct net_device *dev) { struct sis190_private *tp = netdev_priv(dev); @@ -1260,14 +1308,25 @@ static void sis190_set_msglevel(struct net_device *dev, u32 value) } static struct ethtool_ops sis190_ethtool_ops = { + .get_settings = sis190_get_settings, + .set_settings = sis190_set_settings, .get_drvinfo = sis190_get_drvinfo, .get_regs_len = sis190_get_regs_len, .get_regs = sis190_get_regs, .get_link = ethtool_op_get_link, .get_msglevel = sis190_get_msglevel, .set_msglevel = sis190_set_msglevel, + .nway_reset = sis190_nway_reset, }; +static int sis190_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct sis190_private *tp = netdev_priv(dev); + + return !netif_running(dev) ? -EINVAL : + generic_mii_ioctl(&tp->mii_if, if_mii(ifr), cmd, NULL); +} + static int __devinit sis190_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1308,6 +1367,7 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, dev->open = sis190_open; dev->stop = sis190_close; + dev->do_ioctl = sis190_ioctl; dev->get_stats = sis190_get_stats; dev->tx_timeout = sis190_tx_timeout; dev->watchdog_timeo = SIS190_TX_TIMEOUT; From 92aab3c08eca4770228715d26c8234cca4fae9af Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:11:18 +0200 Subject: [PATCH 027/584] [PATCH] sis190: add MAINTAINER entry. add MAINTAINER entry Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ec8433c39dee..b39faa31d76e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2080,6 +2080,12 @@ M: support@simtec.co.uk W: http://www.simtec.co.uk/products/EB2410ITX/ S: Supported +SIS 190 ETHERNET DRIVER +P: Francois Romieu +M: romieu@fr.zoreil.com +L: netdev@vger.kernel.org +S: Maintained + SIS 5513 IDE CONTROLLER DRIVER P: Lionel Bouton M: Lionel.Bouton@inet6.fr From 188f23ba94a618b12cc205306f02b4f5036c4fa7 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:11:43 +0200 Subject: [PATCH 028/584] [PATCH] sis190: merge some register related information from SiS driver. Merge some register related information from SiS driver. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index ff4f24e5f59c..e67a5753882e 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -98,27 +98,36 @@ enum sis190_registers { TxControl = 0x00, TxDescStartAddr = 0x04, - TxNextDescAddr = 0x0c, // unused + rsv0 = 0x08, // reserved + TxSts = 0x0c, // unused (Control/Status) RxControl = 0x10, RxDescStartAddr = 0x14, - RxNextDescAddr = 0x1c, // unused + rsv1 = 0x18, // reserved + RxSts = 0x1c, // unused IntrStatus = 0x20, IntrMask = 0x24, IntrControl = 0x28, - IntrTimer = 0x2c, // unused - PMControl = 0x30, // unused + IntrTimer = 0x2c, // unused (Interupt Timer) + PMControl = 0x30, // unused (Power Mgmt Control/Status) + rsv2 = 0x34, // reserved ROMControl = 0x38, ROMInterface = 0x3c, StationControl = 0x40, GMIIControl = 0x44, + GIoCR = 0x48, // unused (GMAC IO Compensation) + GIoCtrl = 0x4c, // unused (GMAC IO Control) TxMacControl = 0x50, + TxLimit = 0x54, // unused (Tx MAC Timer/TryLimit) + RGDelay = 0x58, // unused (RGMII Tx Internal Delay) + rsv3 = 0x5c, // reserved RxMacControl = 0x60, RxMacAddr = 0x62, RxHashTable = 0x68, // Undocumented = 0x6c, - RxWakeOnLan = 0x70, - // Undocumented = 0x74, - RxMPSControl = 0x78, // unused + RxWolCtrl = 0x70, + RxWolData = 0x74, // unused (Rx WOL Data Access) + RxMPSControl = 0x78, // unused (Rx MPS Control) + rsv4 = 0x7c, // reserved }; enum sis190_register_content { @@ -783,8 +792,8 @@ static void sis190_hw_start(struct net_device *dev) SIS_W16(RxMacControl, 0x02); SIS_W32(RxHashTable, 0x0); SIS_W32(0x6c, 0x0); - SIS_W32(RxWakeOnLan, 0x0); - SIS_W32(0x74, 0x0); + SIS_W32(RxWolCtrl, 0x0); + SIS_W32(RxWolData, 0x0); SIS_PCI_COMMIT(); @@ -1205,6 +1214,10 @@ static void sis190_tx_timeout(struct net_device *dev) if (tmp8 & CmdTxEnb) SIS_W8(TxControl, tmp8 & ~CmdTxEnb); + + net_tx_err(tp, KERN_INFO "%s: Transmit timeout, status %08x %08x.\n", + dev->name, SIS_R32(TxControl), SIS_R32(TxSts)); + /* Disable interrupts by clearing the interrupt mask. */ SIS_W32(IntrMask, 0x0000); From 40292fb0f041362bca2f6ad975acedce4f6e3f3e Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:12:06 +0200 Subject: [PATCH 029/584] [PATCH] sis190: remove hardcoded constants. Replace hardcoded constants by enumerated values in sis190_read_eeprom The names of the enumerated values have been extracted from SiS'official driver (v1.00.00 published on 2005/07/11). Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index e67a5753882e..d915507e90eb 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -5,7 +5,8 @@ Copyright (c) 2003, 2004 Jeff Garzik Copyright (c) 2003, 2004, 2005 Francois Romieu - Based on r8169.c, tg3.c, 8139cp.c, skge.c and probably even epic100.c. + Based on r8169.c, tg3.c, 8139cp.c, skge.c, epic100.c and SiS 190/191 + genuine driver. This software may be used and distributed according to the terms of the GNU General Public License (GPL), incorporated herein by reference. @@ -221,6 +222,16 @@ enum _DescStatusBit { RxSizeMask = 0x0000ffff }; +enum sis190_eeprom_access_register_bits { + EECS = 0x00000001, // unused + EECLK = 0x00000002, // unused + EEDO = 0x00000008, // unused + EEDI = 0x00000004, // unused + EEREQ = 0x00000080, + EEROP = 0x00000200, + EEWOP = 0x00000100 // unused +}; + struct sis190_private { void __iomem *mmio_addr; struct pci_dev *pci_dev; @@ -333,27 +344,24 @@ static int __mdio_read(struct net_device *dev, int phy_id, int reg) return mdio_read(tp->mmio_addr, reg); } -static int sis190_read_eeprom(void __iomem *ioaddr, u32 reg) +static u16 __devinit sis190_read_eeprom(void __iomem *ioaddr, u32 reg) { + u16 data = 0xffff; unsigned int i; - u16 data; - u32 val; if (!(SIS_R32(ROMControl) & 0x0002)) return 0; - val = (0x0080 | (0x2 << 8) | (reg << 10)); - - SIS_W32(ROMInterface, val); + SIS_W32(ROMInterface, EEREQ | EEROP | (reg << 10)); for (i = 0; i < 200; i++) { - if (!(SIS_R32(ROMInterface) & 0x0080)) + if (!(SIS_R32(ROMInterface) & EEREQ)) { + data = (SIS_R32(ROMInterface) & 0xffff0000) >> 16; break; + } msleep(1); } - data = (u16) ((SIS_R32(ROMInterface) & 0xffff0000) >> 16); - return data; } From 830fb7d23217ae748df0b16d4d419110810036b7 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:12:37 +0200 Subject: [PATCH 030/584] [PATCH] sis190: initialisation of MAC address. Extract some mac addr code from SiS's driver. Some magic may hide beyond the isa bridge. The Rx mac control register is now set without condition. Note: good or bad, this part of the code is quite close to sis900.c. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 154 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 138 insertions(+), 16 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index d915507e90eb..1e8e7111c261 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -232,6 +232,14 @@ enum sis190_eeprom_access_register_bits { EEWOP = 0x00000100 // unused }; +/* EEPROM Addresses */ +enum sis190_eeprom_address { + EEPROMSignature = 0x00, + EEPROMCLK = 0x01, // unused + EEPROMInfo = 0x02, + EEPROMMACAddr = 0x03 +}; + struct sis190_private { void __iomem *mmio_addr; struct pci_dev *pci_dev; @@ -1240,6 +1248,125 @@ static void sis190_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } +static int __devinit sis190_get_mac_addr_from_eeprom(struct pci_dev *pdev, + struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + u16 sig; + int i; + + net_probe(tp, KERN_INFO "%s: Read MAC address from EEPROM\n", + pci_name(pdev)); + + /* Check to see if there is a sane EEPROM */ + sig = (u16) sis190_read_eeprom(ioaddr, EEPROMSignature); + + if ((sig == 0xffff) || (sig == 0x0000)) { + net_probe(tp, KERN_INFO "%s: Error EEPROM read %x.\n", + pci_name(pdev), sig); + return -EIO; + } + + /* Get MAC address from EEPROM */ + for (i = 0; i < MAC_ADDR_LEN / 2; i++) { + u16 w = sis190_read_eeprom(ioaddr, EEPROMMACAddr + i); + + ((u16 *)dev->dev_addr)[0] = le16_to_cpu(w); + } + + return 0; +} + +/** + * sis190_get_mac_addr_from_apc - Get MAC address for SiS965 model + * @pdev: PCI device + * @dev: network device to get address for + * + * SiS965 model, use APC CMOS RAM to store MAC address. + * APC CMOS RAM is accessed through ISA bridge. + * MAC address is read into @net_dev->dev_addr. + */ +static int __devinit sis190_get_mac_addr_from_apc(struct pci_dev *pdev, + struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct pci_dev *isa_bridge; + u8 reg, tmp8; + int i; + + net_probe(tp, KERN_INFO "%s: Read MAC address from APC.\n", + pci_name(pdev)); + + isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0965, NULL); + if (!isa_bridge) { + net_probe(tp, KERN_INFO "%s: Can not find ISA bridge.\n", + pci_name(pdev)); + return -EIO; + } + + /* Enable port 78h & 79h to access APC Registers. */ + pci_read_config_byte(isa_bridge, 0x48, &tmp8); + reg = (tmp8 & ~0x02); + pci_write_config_byte(isa_bridge, 0x48, reg); + udelay(50); + pci_read_config_byte(isa_bridge, 0x48, ®); + + for (i = 0; i < MAC_ADDR_LEN; i++) { + outb(0x9 + i, 0x78); + dev->dev_addr[i] = inb(0x79); + } + + outb(0x12, 0x78); + reg = inb(0x79); + + /* Restore the value to ISA Bridge */ + pci_write_config_byte(isa_bridge, 0x48, tmp8); + pci_dev_put(isa_bridge); + + return 0; +} + +/** + * sis190_init_rxfilter - Initialize the Rx filter + * @dev: network device to initialize + * + * Set receive filter address to our MAC address + * and enable packet filtering. + */ +static inline void sis190_init_rxfilter(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + u16 ctl; + int i; + + ctl = SIS_R16(RxMacControl); + /* + * Disable packet filtering before setting filter. + * Note: SiS's driver writes 32 bits but RxMacControl is 16 bits + * only and followed by RxMacAddr (6 bytes). Strange. -- FR + */ + SIS_W16(RxMacControl, ctl & ~0x0f00); + + for (i = 0; i < MAC_ADDR_LEN; i++) + SIS_W8(RxMacAddr + i, dev->dev_addr[i]); + + SIS_W16(RxMacControl, ctl); + SIS_PCI_COMMIT(); +} + +static int sis190_get_mac_addr(struct pci_dev *pdev, struct net_device *dev) +{ + u8 from; + + pci_read_config_byte(pdev, 0x73, &from); + + return (from & 0x00000001) ? + sis190_get_mac_addr_from_apc(pdev, dev) : + sis190_get_mac_addr_from_eeprom(pdev, dev); +} + static void sis190_set_speed_auto(struct net_device *dev) { struct sis190_private *tp = netdev_priv(dev); @@ -1355,7 +1482,7 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, struct sis190_private *tp; struct net_device *dev; void __iomem *ioaddr; - int i, rc; + int rc; if (!printed_version) { net_drv(&debug, KERN_INFO SIS190_DRIVER_NAME " loaded.\n"); @@ -1371,18 +1498,11 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, tp = netdev_priv(dev); ioaddr = tp->mmio_addr; - /* Get MAC address */ - /* Read node address from the EEPROM */ + rc = sis190_get_mac_addr(pdev, dev); + if (rc < 0) + goto err_release_board; - if (SIS_R32(ROMControl) & 0x4) { - for (i = 0; i < 3; i++) { - SIS_W16(RxMacAddr + 2*i, - sis190_read_eeprom(ioaddr, 3 + i)); - } - } - - for (i = 0; i < MAC_ADDR_LEN; i++) - dev->dev_addr[i] = SIS_R8(RxMacAddr + i); + sis190_init_rxfilter(dev); INIT_WORK(&tp->phy_task, sis190_phy_task, dev); @@ -1403,10 +1523,8 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, spin_lock_init(&tp->lock); rc = register_netdev(dev); - if (rc < 0) { - sis190_release_board(pdev); - goto out; - } + if (rc < 0) + goto err_release_board; pci_set_drvdata(pdev, dev); @@ -1423,6 +1541,10 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, sis190_set_speed_auto(dev); out: return rc; + +err_release_board: + sis190_release_board(pdev); + goto out; } static void __devexit sis190_remove_one(struct pci_dev *pdev) From 8b5641d4f1f7376257783b79f121a19ccd86b56b Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:13:03 +0200 Subject: [PATCH 031/584] [PATCH] sis190: the size of the Rx buffer is constrained Add a restriction to the size of the Rx buffer SiS driver forces the size of any Rx buffer to be a multiple of 64 bit. I would not be surprized that it goes along with some alignment issues which have been experienced before. So far it does not make much of a difference (both drivers use 1536 bytes buffer). Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 1e8e7111c261..2229698debbd 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -69,6 +69,7 @@ #define TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) #define RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) #define RX_BUF_SIZE 1536 +#define RX_BUF_MASK 0xfff8 #define SIS190_REGS_SIZE 0x80 #define SIS190_TX_TIMEOUT (6*HZ) @@ -400,7 +401,7 @@ static inline void sis190_give_to_asic(struct RxDesc *desc, u32 rx_buf_sz) u32 eor = le32_to_cpu(desc->size) & RingEnd; desc->PSize = 0x0; - desc->size = cpu_to_le32(rx_buf_sz | eor); + desc->size = cpu_to_le32((rx_buf_sz & RX_BUF_MASK) | eor); wmb(); desc->status = cpu_to_le32(OWNbit | INTbit); } @@ -924,6 +925,11 @@ static void sis190_set_rxbufsize(struct sis190_private *tp, unsigned int mtu = dev->mtu; tp->rx_buf_sz = (mtu > RX_BUF_SIZE) ? mtu + ETH_HLEN + 8 : RX_BUF_SIZE; + /* RxDesc->size has a licence to kill the lower bits */ + if (tp->rx_buf_sz & 0x07) { + tp->rx_buf_sz += 8; + tp->rx_buf_sz &= RX_BUF_MASK; + } } static int sis190_open(struct net_device *dev) From bcad5e537840ef6fa28b2f1e126fefb4c39a7248 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:13:47 +0200 Subject: [PATCH 032/584] [PATCH] sis190: extract bits definition from SiS driver. extract bits definition from SiS driver - fix the Rx stats; - minor pieces of documentation. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 102 +++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 28 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 2229698debbd..3c33b2d14852 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -64,8 +64,8 @@ #define MAC_ADDR_LEN 6 -#define NUM_TX_DESC 64 -#define NUM_RX_DESC 64 +#define NUM_TX_DESC 64 /* [8..1024] */ +#define NUM_RX_DESC 64 /* [8..8192] */ #define TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) #define RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) #define RX_BUF_SIZE 1536 @@ -149,12 +149,6 @@ enum sis190_register_content { RxHalt = 0x00000002, TxHalt = 0x00000001, - /* RxStatusDesc */ - RxRES = 0x00200000, // unused - RxCRC = 0x00080000, - RxRUNT = 0x00100000, // unused - RxRWT = 0x00400000, // unused - /* {Rx/Tx}CmdBits */ CmdReset = 0x10, CmdRxEnb = 0x08, // unused @@ -212,15 +206,55 @@ struct RxDesc { enum _DescStatusBit { /* _Desc.status */ - OWNbit = 0x80000000, - INTbit = 0x40000000, - DEFbit = 0x00200000, - CRCbit = 0x00020000, - PADbit = 0x00010000, + OWNbit = 0x80000000, // RXOWN/TXOWN + INTbit = 0x40000000, // RXINT/TXINT + CRCbit = 0x00020000, // CRCOFF/CRCEN + PADbit = 0x00010000, // PREADD/PADEN /* _Desc.size */ - RingEnd = (1 << 31), - /* _Desc.PSize */ + RingEnd = 0x80000000, + /* TxDesc.status */ + LSEN = 0x08000000, // TSO ? -- FR + IPCS = 0x04000000, + TCPCS = 0x02000000, + UDPCS = 0x01000000, + BSTEN = 0x00800000, + EXTEN = 0x00400000, + DEFEN = 0x00200000, + BKFEN = 0x00100000, + CRSEN = 0x00080000, + COLEN = 0x00040000, + THOL3 = 0x30000000, + THOL2 = 0x20000000, + THOL1 = 0x10000000, + THOL0 = 0x00000000, + /* RxDesc.status */ + IPON = 0x20000000, + TCPON = 0x10000000, + UDPON = 0x08000000, + Wakup = 0x00400000, + Magic = 0x00200000, + Pause = 0x00100000, + DEFbit = 0x00200000, + BCAST = 0x000c0000, + MCAST = 0x00080000, + UCAST = 0x00040000, + /* RxDesc.PSize */ + TAGON = 0x80000000, + RxDescCountMask = 0x7f000000, // multi-desc pkt when > 1 ? -- FR + ABORT = 0x00800000, + SHORT = 0x00400000, + LIMIT = 0x00200000, + MIIER = 0x00100000, + OVRUN = 0x00080000, + NIBON = 0x00040000, + COLON = 0x00020000, + CRCOK = 0x00010000, RxSizeMask = 0x0000ffff + /* + * The asic could apparently do vlan, TSO, jumbo (sis191 only) and + * provide two (unused with Linux) Tx queues. No publically + * available documentation alas. + */ }; enum sis190_eeprom_access_register_bits { @@ -487,6 +521,26 @@ static inline int sis190_try_rx_copy(struct sk_buff **sk_buff, int pkt_size, return ret; } +static inline int sis190_rx_pkt_err(u32 status, struct net_device_stats *stats) +{ +#define ErrMask (OVRUN | SHORT | LIMIT | MIIER | NIBON | COLON | ABORT) + + if ((status & CRCOK) && !(status & ErrMask)) + return 0; + + if (!(status & CRCOK)) + stats->rx_crc_errors++; + else if (status & OVRUN) + stats->rx_over_errors++; + else if (status & (SHORT | LIMIT)) + stats->rx_length_errors++; + else if (status & (MIIER | NIBON | COLON)) + stats->rx_frame_errors++; + + stats->rx_errors++; + return -1; +} + static int sis190_rx_interrupt(struct net_device *dev, struct sis190_private *tp, void __iomem *ioaddr) { @@ -510,19 +564,9 @@ static int sis190_rx_interrupt(struct net_device *dev, // net_intr(tp, KERN_INFO "%s: Rx PSize = %08x.\n", dev->name, // status); - if (status & RxCRC) { - net_intr(tp, KERN_INFO "%s: bad crc. status = %08x.\n", - dev->name, status); - stats->rx_errors++; - stats->rx_crc_errors++; + if (sis190_rx_pkt_err(status, stats) < 0) sis190_give_to_asic(desc, tp->rx_buf_sz); - } else if (!(status & PADbit)) { - net_intr(tp, KERN_INFO "%s: bad pad. status = %08x.\n", - dev->name, status); - stats->rx_errors++; - stats->rx_length_errors++; - sis190_give_to_asic(desc, tp->rx_buf_sz); - } else { + else { struct sk_buff *skb = tp->Rx_skbuff[entry]; int pkt_size = (status & RxSizeMask) - 4; void (*pci_action)(struct pci_dev *, dma_addr_t, @@ -559,8 +603,10 @@ static int sis190_rx_interrupt(struct net_device *dev, sis190_rx_skb(skb); dev->last_rx = jiffies; - stats->rx_bytes += pkt_size; stats->rx_packets++; + stats->rx_bytes += pkt_size; + if ((status & BCAST) == MCAST) + stats->multicast++; } } count = cur_rx - tp->cur_rx; From 3cec93c7124c3037dbff826d6c08e9758a301cd7 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:14:18 +0200 Subject: [PATCH 033/584] [PATCH] sis190: add endian annotations. Add endian annotations. Signed-off-by: Alexey Dobriyan Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 3c33b2d14852..2387d4844037 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -191,17 +191,17 @@ enum sis190_register_content { }; struct TxDesc { - u32 PSize; - u32 status; - u32 addr; - u32 size; + __le32 PSize; + __le32 status; + __le32 addr; + __le32 size; }; struct RxDesc { - u32 PSize; - u32 status; - u32 addr; - u32 size; + __le32 PSize; + __le32 status; + __le32 addr; + __le32 size; }; enum _DescStatusBit { @@ -1322,7 +1322,7 @@ static int __devinit sis190_get_mac_addr_from_eeprom(struct pci_dev *pdev, /* Get MAC address from EEPROM */ for (i = 0; i < MAC_ADDR_LEN / 2; i++) { - u16 w = sis190_read_eeprom(ioaddr, EEPROMMACAddr + i); + __le16 w = sis190_read_eeprom(ioaddr, EEPROMMACAddr + i); ((u16 *)dev->dev_addr)[0] = le16_to_cpu(w); } From 9ede109bbe93d5bbe4271e346106847fbfea95a1 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:14:38 +0200 Subject: [PATCH 034/584] [PATCH] sis190: allow a non-hardcoded ID for the PHY. Allow a non-hardcoded ID for the PHY This is the first step before the driver probes for the PHY address. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 2387d4844037..f78799c4235e 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -354,21 +354,17 @@ static void __mdio_cmd(void __iomem *ioaddr, u32 ctl) printk(KERN_ERR PFX "PHY command failed !\n"); } -static void mdio_write(void __iomem *ioaddr, int reg, int val) +static void mdio_write(void __iomem *ioaddr, int phy_id, int reg, int val) { - u32 pmd = 1; - __mdio_cmd(ioaddr, EhnMIIreq | EhnMIIwrite | - (((u32) reg) << EhnMIIregShift) | (pmd << EhnMIIpmdShift) | + (((u32) reg) << EhnMIIregShift) | (phy_id << EhnMIIpmdShift) | (((u32) val) << EhnMIIdataShift)); } -static int mdio_read(void __iomem *ioaddr, int reg) +static int mdio_read(void __iomem *ioaddr, int phy_id, int reg) { - u32 pmd = 1; - __mdio_cmd(ioaddr, EhnMIIreq | EhnMIIread | - (((u32) reg) << EhnMIIregShift) | (pmd << EhnMIIpmdShift)); + (((u32) reg) << EhnMIIregShift) | (phy_id << EhnMIIpmdShift)); return (u16) (SIS_R32(GMIIControl) >> EhnMIIdataShift); } @@ -377,14 +373,14 @@ static void __mdio_write(struct net_device *dev, int phy_id, int reg, int val) { struct sis190_private *tp = netdev_priv(dev); - mdio_write(tp->mmio_addr, reg, val); + mdio_write(tp->mmio_addr, phy_id, reg, val); } static int __mdio_read(struct net_device *dev, int phy_id, int reg) { struct sis190_private *tp = netdev_priv(dev); - return mdio_read(tp->mmio_addr, reg); + return mdio_read(tp->mmio_addr, phy_id, reg); } static u16 __devinit sis190_read_eeprom(void __iomem *ioaddr, u32 reg) @@ -876,18 +872,19 @@ static void sis190_phy_task(void * data) struct net_device *dev = data; struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; + int phy_id = tp->mii_if.phy_id; u16 val; rtnl_lock(); - val = mdio_read(ioaddr, MII_BMCR); + val = mdio_read(ioaddr, phy_id, MII_BMCR); if (val & BMCR_RESET) { // FIXME: needlessly high ? -- FR 02/07/2005 mod_timer(&tp->timer, jiffies + HZ/10); - } else if (!(mdio_read(ioaddr, MII_BMSR) & BMSR_ANEGCOMPLETE)) { + } else if (!(mdio_read(ioaddr, phy_id, MII_BMSR) & BMSR_ANEGCOMPLETE)) { net_link(tp, KERN_WARNING "%s: PHY reset until link up.\n", dev->name); - mdio_write(ioaddr, MII_BMCR, val | BMCR_RESET); + mdio_write(ioaddr, phy_id, MII_BMCR, val | BMCR_RESET); mod_timer(&tp->timer, jiffies + SIS190_PHY_TIMEOUT); } else { /* Rejoice ! */ @@ -917,10 +914,10 @@ static void sis190_phy_task(void * data) { 0, "unknown", 0x0000 } }, *p; - val = mdio_read(ioaddr, 0x1f); + val = mdio_read(ioaddr, phy_id, 0x1f); net_link(tp, KERN_INFO "%s: mii ext = %04x.\n", dev->name, val); - val = mdio_read(ioaddr, MII_LPA); + val = mdio_read(ioaddr, phy_id, MII_LPA); net_link(tp, KERN_INFO "%s: mii lpa = %04x.\n", dev->name, val); for (p = reg31; p->ctl; p++) { @@ -1250,7 +1247,7 @@ static struct net_device * __devinit sis190_init_board(struct pci_dev *pdev) tp->mii_if.dev = dev; tp->mii_if.mdio_read = __mdio_read; tp->mii_if.mdio_write = __mdio_write; - // tp->mii_if.phy_id = XXX; + tp->mii_if.phy_id = 1; tp->mii_if.phy_id_mask = 0x1f; tp->mii_if.reg_num_mask = 0x1f; @@ -1423,23 +1420,24 @@ static void sis190_set_speed_auto(struct net_device *dev) { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; + int phy_id = tp->mii_if.phy_id; int val; net_link(tp, KERN_INFO "%s: Enabling Auto-negotiation.\n", dev->name); - val = mdio_read(ioaddr, MII_ADVERTISE); + val = mdio_read(ioaddr, phy_id, MII_ADVERTISE); // Enable 10/100 Full/Half Mode, leave MII_ADVERTISE bit4:0 // unchanged. - mdio_write(ioaddr, MII_ADVERTISE, (val & ADVERTISE_SLCT) | + mdio_write(ioaddr, phy_id, MII_ADVERTISE, (val & ADVERTISE_SLCT) | ADVERTISE_100FULL | ADVERTISE_10FULL | ADVERTISE_100HALF | ADVERTISE_10HALF); // Enable 1000 Full Mode. - mdio_write(ioaddr, MII_CTRL1000, ADVERTISE_1000FULL); + mdio_write(ioaddr, phy_id, MII_CTRL1000, ADVERTISE_1000FULL); // Enable auto-negotiation and restart auto-negotiation. - mdio_write(ioaddr, MII_BMCR, + mdio_write(ioaddr, phy_id, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET); } From fc10c39d7920b1db9ad2d80fa845896e529355dc Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:15:01 +0200 Subject: [PATCH 035/584] [PATCH] sis190: dummy read is required by the status register Add a dummy read before accessing the status register SiS driver suggests it. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index f78799c4235e..392110ede7b7 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -383,6 +383,12 @@ static int __mdio_read(struct net_device *dev, int phy_id, int reg) return mdio_read(tp->mmio_addr, phy_id, reg); } +static u16 mdio_read_latched(void __iomem *ioaddr, int phy_id, int reg) +{ + mdio_read(ioaddr, phy_id, reg); + return mdio_read(ioaddr, phy_id, reg); +} + static u16 __devinit sis190_read_eeprom(void __iomem *ioaddr, u32 reg) { u16 data = 0xffff; @@ -881,7 +887,8 @@ static void sis190_phy_task(void * data) if (val & BMCR_RESET) { // FIXME: needlessly high ? -- FR 02/07/2005 mod_timer(&tp->timer, jiffies + HZ/10); - } else if (!(mdio_read(ioaddr, phy_id, MII_BMSR) & BMSR_ANEGCOMPLETE)) { + } else if (!(mdio_read_latched(ioaddr, phy_id, MII_BMSR) & + BMSR_ANEGCOMPLETE)) { net_link(tp, KERN_WARNING "%s: PHY reset until link up.\n", dev->name); mdio_write(ioaddr, phy_id, MII_BMCR, val | BMCR_RESET); From fcb9821d3dd62ede360e7991734ac22b79e9a4f0 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:15:22 +0200 Subject: [PATCH 036/584] [PATCH] sis190: new PHY detection code. New PHY detection code. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 216 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 209 insertions(+), 7 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 392110ede7b7..fe2ab6fd1384 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -43,6 +43,10 @@ #define net_tx_err(p, arg...) if (netif_msg_tx_err(p)) \ printk(arg) +#define PHY_MAX_ADDR 32 +#define PHY_ID_ANY 0x1f +#define MII_REG_ANY 0x1f + #ifdef CONFIG_SIS190_NAPI #define NAPI_SUFFIX "-NAPI" #else @@ -295,6 +299,33 @@ struct sis190_private { struct timer_list timer; u32 msg_enable; struct mii_if_info mii_if; + struct list_head first_phy; +}; + +struct sis190_phy { + struct list_head list; + int phy_id; + u16 id[2]; + u16 status; + u8 type; +}; + +enum sis190_phy_type { + UNKNOWN = 0x00, + HOME = 0x01, + LAN = 0x02, + MIX = 0x03 +}; + +static struct mii_chip_info { + const char *name; + u16 id[2]; + unsigned int type; +} mii_chip_table[] = { + { "Broadcom PHY BCM5461", { 0x0020, 0x60c0 }, LAN }, + { "Agere PHY ET1101B", { 0x0282, 0xf010 }, LAN }, + { "Marvell PHY 88E1111", { 0x0141, 0x0cc0 }, LAN }, + { NULL, } }; const static struct { @@ -1174,6 +1205,177 @@ static struct net_device_stats *sis190_get_stats(struct net_device *dev) return &tp->stats; } +static void sis190_free_phy(struct list_head *first_phy) +{ + struct sis190_phy *cur, *next; + + list_for_each_entry_safe(cur, next, first_phy, list) { + kfree(cur); + } +} + +/** + * sis190_default_phy - Select default PHY for sis190 mac. + * @dev: the net device to probe for + * + * Select first detected PHY with link as default. + * If no one is link on, select PHY whose types is HOME as default. + * If HOME doesn't exist, select LAN. + */ +static u16 sis190_default_phy(struct net_device *dev) +{ + struct sis190_phy *phy, *phy_home, *phy_default, *phy_lan; + struct sis190_private *tp = netdev_priv(dev); + struct mii_if_info *mii_if = &tp->mii_if; + void __iomem *ioaddr = tp->mmio_addr; + u16 status; + + phy_home = phy_default = phy_lan = NULL; + + list_for_each_entry(phy, &tp->first_phy, list) { + status = mdio_read_latched(ioaddr, phy->phy_id, MII_BMSR); + + // Link ON & Not select default PHY & not ghost PHY. + if ((status & BMSR_LSTATUS) && + !phy_default && + (phy->type != UNKNOWN)) { + phy_default = phy; + } else { + status = mdio_read(ioaddr, phy->phy_id, MII_BMCR); + mdio_write(ioaddr, phy->phy_id, MII_BMCR, + status | BMCR_ANENABLE | BMCR_ISOLATE); + if (phy->type == HOME) + phy_home = phy; + else if (phy->type == LAN) + phy_lan = phy; + } + } + + if (!phy_default) { + if (phy_home) + phy_default = phy_home; + else if (phy_lan) + phy_default = phy_lan; + else + phy_default = list_entry(&tp->first_phy, + struct sis190_phy, list); + } + + if (mii_if->phy_id != phy_default->phy_id) { + mii_if->phy_id = phy_default->phy_id; + net_probe(tp, KERN_INFO + "%s: Using transceiver at address %d as default.\n", + dev->name, mii_if->phy_id); + } + + status = mdio_read(ioaddr, mii_if->phy_id, MII_BMCR); + status &= (~BMCR_ISOLATE); + + mdio_write(ioaddr, mii_if->phy_id, MII_BMCR, status); + status = mdio_read_latched(ioaddr, mii_if->phy_id, MII_BMSR); + + return status; +} + +static void sis190_init_phy(struct net_device *dev, struct sis190_private *tp, + struct sis190_phy *phy, unsigned int phy_id, + u16 mii_status) +{ + void __iomem *ioaddr = tp->mmio_addr; + struct mii_chip_info *p; + + INIT_LIST_HEAD(&phy->list); + phy->status = mii_status; + phy->phy_id = phy_id; + + phy->id[0] = mdio_read(ioaddr, phy_id, MII_PHYSID1); + phy->id[1] = mdio_read(ioaddr, phy_id, MII_PHYSID2); + + for (p = mii_chip_table; p->type; p++) { + if ((p->id[0] == phy->id[0]) && + (p->id[1] == (phy->id[1] & 0xfff0))) { + break; + } + } + + if (p->id[1]) { + phy->type = (p->type == MIX) ? + ((mii_status & (BMSR_100FULL | BMSR_100HALF)) ? + LAN : HOME) : p->type; + } else + phy->type = UNKNOWN; + + net_probe(tp, KERN_INFO "%s: %s transceiver at address %d.\n", + dev->name, (phy->type == UNKNOWN) ? "Unknown PHY" : p->name, + phy_id); +} + +/** + * sis190_mii_probe - Probe MII PHY for sis190 + * @dev: the net device to probe for + * + * Search for total of 32 possible mii phy addresses. + * Identify and set current phy if found one, + * return error if it failed to found. + */ +static int __devinit sis190_mii_probe(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + struct mii_if_info *mii_if = &tp->mii_if; + void __iomem *ioaddr = tp->mmio_addr; + int phy_id; + int rc = 0; + + INIT_LIST_HEAD(&tp->first_phy); + + for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) { + struct sis190_phy *phy; + u16 status; + + status = mdio_read_latched(ioaddr, phy_id, MII_BMSR); + + // Try next mii if the current one is not accessible. + if (status == 0xffff || status == 0x0000) + continue; + + phy = kmalloc(sizeof(*phy), GFP_KERNEL); + if (!phy) { + sis190_free_phy(&tp->first_phy); + rc = -ENOMEM; + goto out; + } + + sis190_init_phy(dev, tp, phy, phy_id, status); + + list_add(&tp->first_phy, &phy->list); + } + + if (list_empty(&tp->first_phy)) { + net_probe(tp, KERN_INFO "%s: No MII transceivers found!\n", + dev->name); + rc = -EIO; + goto out; + } + + /* Select default PHY for mac */ + sis190_default_phy(dev); + + mii_if->dev = dev; + mii_if->mdio_read = __mdio_read; + mii_if->mdio_write = __mdio_write; + mii_if->phy_id_mask = PHY_ID_ANY; + mii_if->reg_num_mask = MII_REG_ANY; +out: + return rc; +} + +static void __devexit sis190_mii_remove(struct net_device *dev) +{ + struct sis190_private *tp = netdev_priv(dev); + + sis190_free_phy(&tp->first_phy); +} + static void sis190_release_board(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -1251,13 +1453,6 @@ static struct net_device * __devinit sis190_init_board(struct pci_dev *pdev) tp->pci_dev = pdev; tp->mmio_addr = ioaddr; - tp->mii_if.dev = dev; - tp->mii_if.mdio_read = __mdio_read; - tp->mii_if.mdio_write = __mdio_write; - tp->mii_if.phy_id = 1; - tp->mii_if.phy_id_mask = 0x1f; - tp->mii_if.reg_num_mask = 0x1f; - sis190_irq_mask_and_ack(ioaddr); sis190_soft_reset(ioaddr); @@ -1585,6 +1780,10 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); + rc = sis190_mii_probe(dev); + if (rc < 0) + goto err_unregister_dev; + net_probe(tp, KERN_INFO "%s: %s at %p (IRQ: %d), " "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", pci_name(pdev), sis_chip_info[ent->driver_data].name, @@ -1599,6 +1798,8 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, out: return rc; +err_unregister_dev: + unregister_netdev(dev); err_release_board: sis190_release_board(pdev); goto out; @@ -1608,6 +1809,7 @@ static void __devexit sis190_remove_one(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); + sis190_mii_remove(dev); unregister_netdev(dev); sis190_release_board(pdev); pci_set_drvdata(pdev, NULL); From 560d3d521decc6d05dc0d6e007f1d2e1d3048102 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:15:51 +0200 Subject: [PATCH 037/584] [PATCH] sis190: PHY identifier for the K8S-MX motherboard. Added PHY identifier for the Asus K8S-MX motherboard Note: the same ID appears in the sis900 driver. Signed-off-by: Lars Vahlenberg Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index fe2ab6fd1384..84bc2299f93b 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -325,6 +325,7 @@ static struct mii_chip_info { { "Broadcom PHY BCM5461", { 0x0020, 0x60c0 }, LAN }, { "Agere PHY ET1101B", { 0x0282, 0xf010 }, LAN }, { "Marvell PHY 88E1111", { 0x0141, 0x0cc0 }, LAN }, + { "Realtek PHY RTL8201", { 0x0000, 0x8200 }, LAN }, { NULL, } }; From 8348b4db5f56d2c0d3849db06055225ec15b255a Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 13:16:14 +0200 Subject: [PATCH 038/584] [PATCH] sis190: compare the lpa to the local advertisement The station control register must depend on both the advertisement and the lpa The link partner ability has better be intersected with the current advertised value before it is feed to the station control register. Sight-catched-by: Lars Vahlenberg Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 84bc2299f93b..915ff009c295 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -952,12 +952,17 @@ static void sis190_phy_task(void * data) 0x01 | _10bpsH }, { 0, "unknown", 0x0000 } }, *p; + u16 adv; val = mdio_read(ioaddr, phy_id, 0x1f); net_link(tp, KERN_INFO "%s: mii ext = %04x.\n", dev->name, val); val = mdio_read(ioaddr, phy_id, MII_LPA); - net_link(tp, KERN_INFO "%s: mii lpa = %04x.\n", dev->name, val); + adv = mdio_read(ioaddr, phy_id, MII_ADVERTISE); + net_link(tp, KERN_INFO "%s: mii lpa = %04x adv = %04x.\n", + dev->name, val, adv); + + val &= adv; for (p = reg31; p->ctl; p++) { if ((val & p->val) == p->val) From 00db8189d984d6c51226dafbbe4a667ce9b7d5da Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Sat, 30 Jul 2005 19:31:23 -0400 Subject: [PATCH 039/584] This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- Documentation/networking/phy.txt | 288 +++++++++++ drivers/net/Kconfig | 2 + drivers/net/Makefile | 1 + drivers/net/phy/Kconfig | 57 ++ drivers/net/phy/Makefile | 9 + drivers/net/phy/cicada.c | 134 +++++ drivers/net/phy/davicom.c | 195 +++++++ drivers/net/phy/lxt.c | 179 +++++++ drivers/net/phy/marvell.c | 140 +++++ drivers/net/phy/mdio_bus.c | 173 +++++++ drivers/net/phy/phy.c | 862 +++++++++++++++++++++++++++++++ drivers/net/phy/phy.c.orig | 860 ++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 682 ++++++++++++++++++++++++ drivers/net/phy/qsemi.c | 143 +++++ include/linux/ethtool.h | 4 + include/linux/mii.h | 9 +- include/linux/phy.h | 378 ++++++++++++++ 17 files changed, 4115 insertions(+), 1 deletion(-) create mode 100644 Documentation/networking/phy.txt create mode 100644 drivers/net/phy/Kconfig create mode 100644 drivers/net/phy/Makefile create mode 100644 drivers/net/phy/cicada.c create mode 100644 drivers/net/phy/davicom.c create mode 100644 drivers/net/phy/lxt.c create mode 100644 drivers/net/phy/marvell.c create mode 100644 drivers/net/phy/mdio_bus.c create mode 100644 drivers/net/phy/phy.c create mode 100644 drivers/net/phy/phy.c.orig create mode 100644 drivers/net/phy/phy_device.c create mode 100644 drivers/net/phy/qsemi.c create mode 100644 include/linux/phy.h diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt new file mode 100644 index 000000000000..29ccae409031 --- /dev/null +++ b/Documentation/networking/phy.txt @@ -0,0 +1,288 @@ + +------- +PHY Abstraction Layer +(Updated 2005-07-21) + +Purpose + + Most network devices consist of set of registers which provide an interface + to a MAC layer, which communicates with the physical connection through a + PHY. The PHY concerns itself with negotiating link parameters with the link + partner on the other side of the network connection (typically, an ethernet + cable), and provides a register interface to allow drivers to determine what + settings were chosen, and to configure what settings are allowed. + + While these devices are distinct from the network devices, and conform to a + standard layout for the registers, it has been common practice to integrate + the PHY management code with the network driver. This has resulted in large + amounts of redundant code. Also, on embedded systems with multiple (and + sometimes quite different) ethernet controllers connected to the same + management bus, it is difficult to ensure safe use of the bus. + + Since the PHYs are devices, and the management busses through which they are + accessed are, in fact, busses, the PHY Abstraction Layer treats them as such. + In doing so, it has these goals: + + 1) Increase code-reuse + 2) Increase overall code-maintainability + 3) Speed development time for new network drivers, and for new systems + + Basically, this layer is meant to provide an interface to PHY devices which + allows network driver writers to write as little code as possible, while + still providing a full feature set. + +The MDIO bus + + Most network devices are connected to a PHY by means of a management bus. + Different devices use different busses (though some share common interfaces). + In order to take advantage of the PAL, each bus interface needs to be + registered as a distinct device. + + 1) read and write functions must be implemented. Their prototypes are: + + int write(struct mii_bus *bus, int mii_id, int regnum, u16 value); + int read(struct mii_bus *bus, int mii_id, int regnum); + + mii_id is the address on the bus for the PHY, and regnum is the register + number. These functions are guaranteed not to be called from interrupt + time, so it is safe for them to block, waiting for an interrupt to signal + the operation is complete + + 2) A reset function is necessary. This is used to return the bus to an + initialized state. + + 3) A probe function is needed. This function should set up anything the bus + driver needs, setup the mii_bus structure, and register with the PAL using + mdiobus_register. Similarly, there's a remove function to undo all of + that (use mdiobus_unregister). + + 4) Like any driver, the device_driver structure must be configured, and init + exit functions are used to register the driver. + + 5) The bus must also be declared somewhere as a device, and registered. + + As an example for how one driver implemented an mdio bus driver, see + drivers/net/gianfar_mii.c and arch/ppc/syslib/mpc85xx_devices.c + +Connecting to a PHY + + Sometime during startup, the network driver needs to establish a connection + between the PHY device, and the network device. At this time, the PHY's bus + and drivers need to all have been loaded, so it is ready for the connection. + At this point, there are several ways to connect to the PHY: + + 1) The PAL handles everything, and only calls the network driver when + the link state changes, so it can react. + + 2) The PAL handles everything except interrupts (usually because the + controller has the interrupt registers). + + 3) The PAL handles everything, but checks in with the driver every second, + allowing the network driver to react first to any changes before the PAL + does. + + 4) The PAL serves only as a library of functions, with the network device + manually calling functions to update status, and configure the PHY + + +Letting the PHY Abstraction Layer do Everything + + If you choose option 1 (The hope is that every driver can, but to still be + useful to drivers that can't), connecting to the PHY is simple: + + First, you need a function to react to changes in the link state. This + function follows this protocol: + + static void adjust_link(struct net_device *dev); + + Next, you need to know the device name of the PHY connected to this device. + The name will look something like, "phy0:0", where the first number is the + bus id, and the second is the PHY's address on that bus. + + Now, to connect, just call this function: + + phydev = phy_connect(dev, phy_name, &adjust_link, flags); + + phydev is a pointer to the phy_device structure which represents the PHY. If + phy_connect is successful, it will return the pointer. dev, here, is the + pointer to your net_device. Once done, this function will have started the + PHY's software state machine, and registered for the PHY's interrupt, if it + has one. The phydev structure will be populated with information about the + current state, though the PHY will not yet be truly operational at this + point. + + flags is a u32 which can optionally contain phy-specific flags. + This is useful if the system has put hardware restrictions on + the PHY/controller, of which the PHY needs to be aware. + + Now just make sure that phydev->supported and phydev->advertising have any + values pruned from them which don't make sense for your controller (a 10/100 + controller may be connected to a gigabit capable PHY, so you would need to + mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions + for these bitfields. Note that you should not SET any bits, or the PHY may + get put into an unsupported state. + + Lastly, once the controller is ready to handle network traffic, you call + phy_start(phydev). This tells the PAL that you are ready, and configures the + PHY to connect to the network. If you want to handle your own interrupts, + just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start. + Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL. + + When you want to disconnect from the network (even if just briefly), you call + phy_stop(phydev). + +Keeping Close Tabs on the PAL + + It is possible that the PAL's built-in state machine needs a little help to + keep your network device and the PHY properly in sync. If so, you can + register a helper function when connecting to the PHY, which will be called + every second before the state machine reacts to any changes. To do this, you + need to manually call phy_attach() and phy_prepare_link(), and then call + phy_start_machine() with the second argument set to point to your special + handler. + + Currently there are no examples of how to use this functionality, and testing + on it has been limited because the author does not have any drivers which use + it (they all use option 1). So Caveat Emptor. + +Doing it all yourself + + There's a remote chance that the PAL's built-in state machine cannot track + the complex interactions between the PHY and your network device. If this is + so, you can simply call phy_attach(), and not call phy_start_machine or + phy_prepare_link(). This will mean that phydev->state is entirely yours to + handle (phy_start and phy_stop toggle between some of the states, so you + might need to avoid them). + + An effort has been made to make sure that useful functionality can be + accessed without the state-machine running, and most of these functions are + descended from functions which did not interact with a complex state-machine. + However, again, no effort has been made so far to test running without the + state machine, so tryer beware. + + Here is a brief rundown of the functions: + + int phy_read(struct phy_device *phydev, u16 regnum); + int phy_write(struct phy_device *phydev, u16 regnum, u16 val); + + Simple read/write primitives. They invoke the bus's read/write function + pointers. + + void phy_print_status(struct phy_device *phydev); + + A convenience function to print out the PHY status neatly. + + int phy_clear_interrupt(struct phy_device *phydev); + int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); + + Clear the PHY's interrupt, and configure which ones are allowed, + respectively. Currently only supports all on, or all off. + + int phy_enable_interrupts(struct phy_device *phydev); + int phy_disable_interrupts(struct phy_device *phydev); + + Functions which enable/disable PHY interrupts, clearing them + before and after, respectively. + + int phy_start_interrupts(struct phy_device *phydev); + int phy_stop_interrupts(struct phy_device *phydev); + + Requests the IRQ for the PHY interrupts, then enables them for + start, or disables then frees them for stop. + + struct phy_device * phy_attach(struct net_device *dev, const char *phy_id, + u32 flags); + + Attaches a network device to a particular PHY, binding the PHY to a generic + driver if none was found during bus initialization. Passes in + any phy-specific flags as needed. + + int phy_start_aneg(struct phy_device *phydev); + + Using variables inside the phydev structure, either configures advertising + and resets autonegotiation, or disables autonegotiation, and configures + forced settings. + + static inline int phy_read_status(struct phy_device *phydev); + + Fills the phydev structure with up-to-date information about the current + settings in the PHY. + + void phy_sanitize_settings(struct phy_device *phydev) + + Resolves differences between currently desired settings, and + supported settings for the given PHY device. Does not make + the changes in the hardware, though. + + int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); + int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); + + Ethtool convenience functions. + + int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); + + The MII ioctl. Note that this function will completely screw up the state + machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to + use this only to write registers which are not standard, and don't set off + a renegotiation. + + +PHY Device Drivers + + With the PHY Abstraction Layer, adding support for new PHYs is + quite easy. In some cases, no work is required at all! However, + many PHYs require a little hand-holding to get up-and-running. + +Generic PHY driver + + If the desired PHY doesn't have any errata, quirks, or special + features you want to support, then it may be best to not add + support, and let the PHY Abstraction Layer's Generic PHY Driver + do all of the work. + +Writing a PHY driver + + If you do need to write a PHY driver, the first thing to do is + make sure it can be matched with an appropriate PHY device. + This is done during bus initialization by reading the device's + UID (stored in registers 2 and 3), then comparing it to each + driver's phy_id field by ANDing it with each driver's + phy_id_mask field. Also, it needs a name. Here's an example: + + static struct phy_driver dm9161_driver = { + .phy_id = 0x0181b880, + .name = "Davicom DM9161E", + .phy_id_mask = 0x0ffffff0, + ... + } + + Next, you need to specify what features (speed, duplex, autoneg, + etc) your PHY device and driver support. Most PHYs support + PHY_BASIC_FEATURES, but you can look in include/mii.h for other + features. + + Each driver consists of a number of function pointers: + + config_init: configures PHY into a sane state after a reset. + For instance, a Davicom PHY requires descrambling disabled. + probe: Does any setup needed by the driver + suspend/resume: power management + config_aneg: Changes the speed/duplex/negotiation settings + read_status: Reads the current speed/duplex/negotiation settings + ack_interrupt: Clear a pending interrupt + config_intr: Enable or disable interrupts + remove: Does any driver take-down + + Of these, only config_aneg and read_status are required to be + assigned by the driver code. The rest are optional. Also, it is + preferred to use the generic phy driver's versions of these two + functions if at all possible: genphy_read_status and + genphy_config_aneg. If this is not possible, it is likely that + you only need to perform some actions before and after invoking + these functions, and so your functions will wrap the generic + ones. + + Feel free to look at the Marvell, Cicada, and Davicom drivers in + drivers/net/phy/ for examples (the lxt and qsemi drivers have + not been tested as of this writing) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8a835eb58808..1e50b8e32add 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -131,6 +131,8 @@ config NET_SB1000 source "drivers/net/arcnet/Kconfig" +source "drivers/net/phy/Kconfig" + # # Ethernet # diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 63c6d1e6d4d9..a369ae284a9a 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o # obj-$(CONFIG_MII) += mii.o +obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_SUNDANCE) += sundance.o obj-$(CONFIG_HAMACHI) += hamachi.o diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig new file mode 100644 index 000000000000..8b5db2343cc3 --- /dev/null +++ b/drivers/net/phy/Kconfig @@ -0,0 +1,57 @@ +# +# PHY Layer Configuration +# + +menu "PHY device support" + +config PHYLIB + bool "PHY Device support and infrastructure" + depends on NET_ETHERNET + help + Ethernet controllers are usually attached to PHY + devices. This option provides infrastructure for + managing PHY devices. + +config PHYCONTROL + bool "Support for automatically handling PHY state changes" + depends on PHYLIB + help + Adds code to perform all the work for keeping PHY link + state (speed/duplex/etc) up-to-date. Also handles + interrupts. + +comment "MII PHY device drivers" + depends on PHYLIB + +config MARVELL_PHY + bool "Drivers for Marvell PHYs" + depends on PHYLIB + ---help--- + Currently has a driver for the 88E1011S + +config DAVICOM_PHY + bool "Drivers for Davicom PHYs" + depends on PHYLIB + ---help--- + Currently supports dm9161e and dm9131 + +config QSEMI_PHY + bool "Drivers for Quality Semiconductor PHYs" + depends on PHYLIB + ---help--- + Currently supports the qs6612 + +config LXT_PHY + bool "Drivers for the Intel LXT PHYs" + depends on PHYLIB + ---help--- + Currently supports the lxt970, lxt971 + +config CICADA_PHY + bool "Drivers for the Cicada PHYs" + depends on PHYLIB + ---help--- + Currently supports the cis8204 + +endmenu + diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile new file mode 100644 index 000000000000..1af05de6ced0 --- /dev/null +++ b/drivers/net/phy/Makefile @@ -0,0 +1,9 @@ +# Makefile for Linux PHY drivers + +obj-$(CONFIG_PHYLIB) += phy.o phy_device.o mdio_bus.o + +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c new file mode 100644 index 000000000000..c47fb2ecd147 --- /dev/null +++ b/drivers/net/phy/cicada.c @@ -0,0 +1,134 @@ +/* + * drivers/net/phy/cicada.c + * + * Driver for Cicada PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Cicada Extended Control Register 1 */ +#define MII_CIS8201_EXT_CON1 0x17 +#define MII_CIS8201_EXTCON1_INIT 0x0000 + +/* Cicada Interrupt Mask Register */ +#define MII_CIS8201_IMASK 0x19 +#define MII_CIS8201_IMASK_IEN 0x8000 +#define MII_CIS8201_IMASK_SPEED 0x4000 +#define MII_CIS8201_IMASK_LINK 0x2000 +#define MII_CIS8201_IMASK_DUPLEX 0x1000 +#define MII_CIS8201_IMASK_MASK 0xf000 + +/* Cicada Interrupt Status Register */ +#define MII_CIS8201_ISTAT 0x1a +#define MII_CIS8201_ISTAT_STATUS 0x8000 +#define MII_CIS8201_ISTAT_SPEED 0x4000 +#define MII_CIS8201_ISTAT_LINK 0x2000 +#define MII_CIS8201_ISTAT_DUPLEX 0x1000 + +/* Cicada Auxiliary Control/Status Register */ +#define MII_CIS8201_AUX_CONSTAT 0x1c +#define MII_CIS8201_AUXCONSTAT_INIT 0x0004 +#define MII_CIS8201_AUXCONSTAT_DUPLEX 0x0020 +#define MII_CIS8201_AUXCONSTAT_SPEED 0x0018 +#define MII_CIS8201_AUXCONSTAT_GBIT 0x0010 +#define MII_CIS8201_AUXCONSTAT_100 0x0008 + +MODULE_DESCRIPTION("Cicadia PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int cis820x_config_init(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_CIS8201_AUX_CONSTAT, + MII_CIS8201_AUXCONSTAT_INIT); + + if (err < 0) + return err; + + err = phy_write(phydev, MII_CIS8201_EXT_CON1, + MII_CIS8201_EXTCON1_INIT); + + return err; +} + +static int cis820x_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_CIS8201_ISTAT); + + return (err < 0) ? err : 0; +} + +static int cis820x_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_CIS8201_IMASK, + MII_CIS8201_IMASK_MASK); + else + err = phy_write(phydev, MII_CIS8201_IMASK, 0); + + return err; +} + +/* Cicada 820x */ +static struct phy_driver cis8204_driver = { + .phy_id = 0x000fc440, + .name = "Cicada Cis8204", + .phy_id_mask = 0x000fffc0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &cis820x_config_init, + .config_aneg = &genphy_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &cis820x_ack_interrupt, + .config_intr = &cis820x_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init cis8204_init(void) +{ + return phy_driver_register(&cis8204_driver); +} + +static void __exit cis8204_exit(void) +{ + phy_driver_unregister(&cis8204_driver); +} + +module_init(cis8204_init); +module_exit(cis8204_exit); diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c new file mode 100644 index 000000000000..6caf499fae32 --- /dev/null +++ b/drivers/net/phy/davicom.c @@ -0,0 +1,195 @@ +/* + * drivers/net/phy/davicom.c + * + * Driver for Davicom PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MII_DM9161_SCR 0x10 +#define MII_DM9161_SCR_INIT 0x0610 + +/* DM9161 Interrupt Register */ +#define MII_DM9161_INTR 0x15 +#define MII_DM9161_INTR_PEND 0x8000 +#define MII_DM9161_INTR_DPLX_MASK 0x0800 +#define MII_DM9161_INTR_SPD_MASK 0x0400 +#define MII_DM9161_INTR_LINK_MASK 0x0200 +#define MII_DM9161_INTR_MASK 0x0100 +#define MII_DM9161_INTR_DPLX_CHANGE 0x0010 +#define MII_DM9161_INTR_SPD_CHANGE 0x0008 +#define MII_DM9161_INTR_LINK_CHANGE 0x0004 +#define MII_DM9161_INTR_INIT 0x0000 +#define MII_DM9161_INTR_STOP \ +(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \ + | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK) + +/* DM9161 10BT Configuration/Status */ +#define MII_DM9161_10BTCSR 0x12 +#define MII_DM9161_10BTCSR_INIT 0x7800 + +MODULE_DESCRIPTION("Davicom PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + + +#define DM9161_DELAY 1 +static int dm9161_config_intr(struct phy_device *phydev) +{ + int temp; + + temp = phy_read(phydev, MII_DM9161_INTR); + + if (temp < 0) + return temp; + + if(PHY_INTERRUPT_ENABLED == phydev->interrupts ) + temp &= ~(MII_DM9161_INTR_STOP); + else + temp |= MII_DM9161_INTR_STOP; + + temp = phy_write(phydev, MII_DM9161_INTR, temp); + + return temp; +} + +static int dm9161_config_aneg(struct phy_device *phydev) +{ + int err; + + /* Isolate the PHY */ + err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE); + + if (err < 0) + return err; + + /* Configure the new settings */ + err = genphy_config_aneg(phydev); + + if (err < 0) + return err; + + return 0; +} + +static int dm9161_config_init(struct phy_device *phydev) +{ + int err; + + /* Isolate the PHY */ + err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE); + + if (err < 0) + return err; + + /* Do not bypass the scrambler/descrambler */ + err = phy_write(phydev, MII_DM9161_SCR, MII_DM9161_SCR_INIT); + + if (err < 0) + return err; + + /* Clear 10BTCSR to default */ + err = phy_write(phydev, MII_DM9161_10BTCSR, MII_DM9161_10BTCSR_INIT); + + if (err < 0) + return err; + + /* Reconnect the PHY, and enable Autonegotiation */ + err = phy_write(phydev, MII_BMCR, BMCR_ANENABLE); + + if (err < 0) + return err; + + return 0; +} + +static int dm9161_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_DM9161_INTR); + + return (err < 0) ? err : 0; +} + +static struct phy_driver dm9161_driver = { + .phy_id = 0x0181b880, + .name = "Davicom DM9161E", + .phy_id_mask = 0x0ffffff0, + .features = PHY_BASIC_FEATURES, + .config_init = dm9161_config_init, + .config_aneg = dm9161_config_aneg, + .read_status = genphy_read_status, + .driver = { .owner = THIS_MODULE,}, +}; + +static struct phy_driver dm9131_driver = { + .phy_id = 0x00181b80, + .name = "Davicom DM9131", + .phy_id_mask = 0x0ffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = dm9161_ack_interrupt, + .config_intr = dm9161_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init davicom_init(void) +{ + int ret; + + ret = phy_driver_register(&dm9161_driver); + if (ret) + goto err1; + + ret = phy_driver_register(&dm9131_driver); + if (ret) + goto err2; + return 0; + + err2: + phy_driver_unregister(&dm9161_driver); + err1: + return ret; +} + +static void __exit davicom_exit(void) +{ + phy_driver_unregister(&dm9161_driver); + phy_driver_unregister(&dm9131_driver); +} + +module_init(davicom_init); +module_exit(davicom_exit); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c new file mode 100644 index 000000000000..4c840448ec86 --- /dev/null +++ b/drivers/net/phy/lxt.c @@ -0,0 +1,179 @@ +/* + * drivers/net/phy/lxt.c + * + * Driver for Intel LXT PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* The Level one LXT970 is used by many boards */ + +#define MII_LXT970_IER 17 /* Interrupt Enable Register */ + +#define MII_LXT970_IER_IEN 0x0002 + +#define MII_LXT970_ISR 18 /* Interrupt Status Register */ + +#define MII_LXT970_CONFIG 19 /* Configuration Register */ + +/* ------------------------------------------------------------------------- */ +/* The Level one LXT971 is used on some of my custom boards */ + +/* register definitions for the 971 */ +#define MII_LXT971_IER 18 /* Interrupt Enable Register */ +#define MII_LXT971_IER_IEN 0x00f2 + +#define MII_LXT971_ISR 19 /* Interrupt Status Register */ + + +MODULE_DESCRIPTION("Intel LXT PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int lxt970_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, MII_BMSR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_LXT970_ISR); + + if (err < 0) + return err; + + return 0; +} + +static int lxt970_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_LXT970_IER, MII_LXT970_IER_IEN); + else + err = phy_write(phydev, MII_LXT970_IER, 0); + + return err; +} + +static int lxt970_config_init(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_LXT970_CONFIG, 0); + + return err; +} + + +static int lxt971_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_LXT971_ISR); + + if (err < 0) + return err; + + return 0; +} + +static int lxt971_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_LXT971_IER, MII_LXT971_IER_IEN); + else + err = phy_write(phydev, MII_LXT971_IER, 0); + + return err; +} + +static struct phy_driver lxt970_driver = { + .phy_id = 0x07810000, + .name = "LXT970", + .phy_id_mask = 0x0fffffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = lxt970_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = lxt970_ack_interrupt, + .config_intr = lxt970_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static struct phy_driver lxt971_driver = { + .phy_id = 0x0001378e, + .name = "LXT971", + .phy_id_mask = 0x0fffffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = lxt971_ack_interrupt, + .config_intr = lxt971_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init lxt_init(void) +{ + int ret; + + ret = phy_driver_register(&lxt970_driver); + if (ret) + goto err1; + + ret = phy_driver_register(&lxt971_driver); + if (ret) + goto err2; + return 0; + + err2: + phy_driver_unregister(&lxt970_driver); + err1: + return ret; +} + +static void __exit lxt_exit(void) +{ + phy_driver_unregister(&lxt970_driver); + phy_driver_unregister(&lxt971_driver); +} + +module_init(lxt_init); +module_exit(lxt_exit); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c new file mode 100644 index 000000000000..4a72b025006b --- /dev/null +++ b/drivers/net/phy/marvell.c @@ -0,0 +1,140 @@ +/* + * drivers/net/phy/marvell.c + * + * Driver for Marvell PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MII_M1011_IEVENT 0x13 +#define MII_M1011_IEVENT_CLEAR 0x0000 + +#define MII_M1011_IMASK 0x12 +#define MII_M1011_IMASK_INIT 0x6400 +#define MII_M1011_IMASK_CLEAR 0x0000 + +MODULE_DESCRIPTION("Marvell PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int marvell_ack_interrupt(struct phy_device *phydev) +{ + int err; + + /* Clear the interrupts by reading the reg */ + err = phy_read(phydev, MII_M1011_IEVENT); + + if (err < 0) + return err; + + return 0; +} + +static int marvell_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_INIT); + else + err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR); + + return err; +} + +static int marvell_config_aneg(struct phy_device *phydev) +{ + int err; + + /* The Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x1f); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x200c); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x5); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x100); + if (err < 0) + return err; + + + err = genphy_config_aneg(phydev); + + return err; +} + + +static struct phy_driver m88e1101_driver = { + .phy_id = 0x01410c00, + .phy_id_mask = 0xffffff00, + .name = "Marvell 88E1101", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = &marvell_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &marvell_ack_interrupt, + .config_intr = &marvell_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init marvell_init(void) +{ + return phy_driver_register(&m88e1101_driver); +} + +static void __exit marvell_exit(void) +{ + phy_driver_unregister(&m88e1101_driver); +} + +module_init(marvell_init); +module_exit(marvell_exit); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c new file mode 100644 index 000000000000..e75103ba6f86 --- /dev/null +++ b/drivers/net/phy/mdio_bus.c @@ -0,0 +1,173 @@ +/* + * drivers/net/phy/mdio_bus.c + * + * MDIO Bus interface + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* mdiobus_register + * + * description: Called by a bus driver to bring up all the PHYs + * on a given bus, and attach them to the bus + */ +int mdiobus_register(struct mii_bus *bus) +{ + int i; + int err = 0; + + spin_lock_init(&bus->mdio_lock); + + if (NULL == bus || NULL == bus->name || + NULL == bus->read || + NULL == bus->write) + return -EINVAL; + + if (bus->reset) + bus->reset(bus); + + for (i = 0; i < PHY_MAX_ADDR; i++) { + struct phy_device *phydev; + + phydev = get_phy_device(bus, i); + + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + + /* There's a PHY at this address + * We need to set: + * 1) IRQ + * 2) bus_id + * 3) parent + * 4) bus + * 5) mii_bus + * And, we need to register it */ + if (phydev) { + phydev->irq = bus->irq[i]; + + phydev->dev.parent = bus->dev; + phydev->dev.bus = &mdio_bus_type; + sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); + + phydev->bus = bus; + + err = device_register(&phydev->dev); + + if (err) + printk(KERN_ERR "phy %d failed to register\n", + i); + } + + bus->phy_map[i] = phydev; + } + + pr_info("%s: probed\n", bus->name); + + return err; +} +EXPORT_SYMBOL(mdiobus_register); + +void mdiobus_unregister(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if (bus->phy_map[i]) { + device_unregister(&bus->phy_map[i]->dev); + kfree(bus->phy_map[i]); + } + } +} +EXPORT_SYMBOL(mdiobus_unregister); + +/* mdio_bus_match + * + * description: Given a PHY device, and a PHY driver, return 1 if + * the driver supports the device. Otherwise, return 0 + */ +static int mdio_bus_match(struct device *dev, struct device_driver *drv) +{ + struct phy_device *phydev = to_phy_device(dev); + struct phy_driver *phydrv = to_phy_driver(drv); + + return (phydrv->phy_id == (phydev->phy_id & phydrv->phy_id_mask)); +} + +/* Suspend and resume. Copied from platform_suspend and + * platform_resume + */ +static int mdio_bus_suspend(struct device * dev, u32 state) +{ + int ret = 0; + struct device_driver *drv = dev->driver; + + if (drv && drv->suspend) { + ret = drv->suspend(dev, state, SUSPEND_DISABLE); + if (ret == 0) + ret = drv->suspend(dev, state, SUSPEND_SAVE_STATE); + if (ret == 0) + ret = drv->suspend(dev, state, SUSPEND_POWER_DOWN); + } + return ret; +} + +static int mdio_bus_resume(struct device * dev) +{ + int ret = 0; + struct device_driver *drv = dev->driver; + + if (drv && drv->resume) { + ret = drv->resume(dev, RESUME_POWER_ON); + if (ret == 0) + ret = drv->resume(dev, RESUME_RESTORE_STATE); + if (ret == 0) + ret = drv->resume(dev, RESUME_ENABLE); + } + return ret; +} + +struct bus_type mdio_bus_type = { + .name = "mdio_bus", + .match = mdio_bus_match, + .suspend = mdio_bus_suspend, + .resume = mdio_bus_resume, +}; + +static int __init mdio_bus_init(void) +{ + return bus_register(&mdio_bus_type); +} + +subsys_initcall(mdio_bus_init); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c new file mode 100644 index 000000000000..e2c6896b92d2 --- /dev/null +++ b/drivers/net/phy/phy.c @@ -0,0 +1,862 @@ +/* + * drivers/net/phy/phy.c + * + * Framework for configuring and reading PHY devices + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static void phy_change(void *data); +static void phy_timer(unsigned long data); + +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); + + +/* Convenience functions for reading/writing a given PHY + * register. They MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. */ +int phy_read(struct phy_device *phydev, u16 regnum) +{ + int retval; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + retval = bus->read(bus, phydev->addr, regnum); + spin_unlock_bh(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(phy_read); + +int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + int err; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + err = bus->write(bus, phydev->addr, regnum, val); + spin_unlock_bh(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(phy_write); + + +int phy_clear_interrupt(struct phy_device *phydev) +{ + int err = 0; + + if (phydev->drv->ack_interrupt) + err = phydev->drv->ack_interrupt(phydev); + + return err; +} + + +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) +{ + int err = 0; + + phydev->interrupts = interrupts; + if (phydev->drv->config_intr) + err = phydev->drv->config_intr(phydev); + + return err; +} + + +/* phy_aneg_done + * + * description: Reads the status register and returns 0 either if + * auto-negotiation is incomplete, or if there was an error. + * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + */ +static inline int phy_aneg_done(struct phy_device *phydev) +{ + int retval; + + retval = phy_read(phydev, MII_BMSR); + + return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); +} + +/* phy_start_aneg + * + * description: Calls the PHY driver's config_aneg, and then + * sets the PHY state to PHY_AN if auto-negotiation is enabled, + * and to PHY_FORCING if auto-negotiation is disabled. Unless + * the PHY is currently HALTED. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value */ +struct phy_setting { + int speed; + int duplex; + u32 setting; +}; + +/* A mapping of all SUPPORTED settings to speed/duplex */ +static struct phy_setting settings[] = { + { + .speed = 10000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_1000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_1000baseT_Half, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_100baseT_Full, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_100baseT_Half, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10baseT_Full, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_10baseT_Half, + }, +}; + +#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) + +/* phy_find_setting + * + * description: Searches the settings array for the setting which + * matches the desired speed and duplex, and returns the index + * of that setting. Returns the index of the last setting if + * none of the others match. + */ +static inline int phy_find_setting(int speed, int duplex) +{ + int idx = 0; + + while (idx < ARRAY_SIZE(settings) && + (settings[idx].speed != speed || + settings[idx].duplex != duplex)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_find_valid + * idx: The first index in settings[] to search + * features: A mask of the valid settings + * + * description: Returns the index of the first valid setting less + * than or equal to the one pointed to by idx, as determined by + * the mask in features. Returns the index of the last setting + * if nothing else matches. + */ +static inline int phy_find_valid(int idx, u32 features) +{ + while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_sanitize_settings + * + * description: Make sure the PHY is set to supported speeds and + * duplexes. Drop down by one in this order: 1000/FULL, + * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF + */ +void phy_sanitize_settings(struct phy_device *phydev) +{ + u32 features = phydev->supported; + int idx; + + /* Sanitize settings based on PHY capabilities */ + if ((features & SUPPORTED_Autoneg) == 0) + phydev->autoneg = 0; + + idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), + features); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; +} +EXPORT_SYMBOL(phy_sanitize_settings); + +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + +/* phy_ethtool_sset: + * A generic ethtool sset function. Handles all the details + * + * A few notes about parameter checking: + * - We don't set port or transceiver, so we don't care what they + * were set to. + * - phy_start_aneg() will make sure forced settings are sane, and + * choose the next best ones from the ones selected, so we don't + * care if ethtool tries to give us bad values + */ +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + if (cmd->phy_address != phydev->addr) + return -EINVAL; + + /* We make sure that we don't pass unsupported + * values in to the PHY */ + cmd->advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_DISABLE + && ((cmd->speed != SPEED_1000 + && cmd->speed != SPEED_100 + && cmd->speed != SPEED_10) + || (cmd->duplex != DUPLEX_HALF + && cmd->duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = cmd->autoneg; + + phydev->speed = cmd->speed; + + phydev->advertising = cmd->advertising; + + if (AUTONEG_ENABLE == cmd->autoneg) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = cmd->duplex; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} + +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + cmd->supported = phydev->supported; + + cmd->advertising = phydev->advertising; + + cmd->speed = phydev->speed; + cmd->duplex = phydev->duplex; + cmd->port = PORT_MII; + cmd->phy_address = phydev->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = phydev->autoneg; + + return 0; +} + + +/* Note that this function is currently incompatible with the + * PHYCONTROL layer. It changes registers without regard to + * current state. Use at own risk + */ +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd) +{ + u16 val = mii_data->val_in; + + switch (cmd) { + case SIOCGMIIPHY: + mii_data->phy_id = phydev->addr; + break; + case SIOCGMIIREG: + mii_data->val_out = phy_read(phydev, mii_data->reg_num); + break; + + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == phydev->addr) { + switch(mii_data->reg_num) { + case MII_BMCR: + if (val & (BMCR_RESET|BMCR_ANENABLE)) + phydev->autoneg = AUTONEG_DISABLE; + else + phydev->autoneg = AUTONEG_ENABLE; + if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + break; + case MII_ADVERTISE: + phydev->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + phy_write(phydev, mii_data->reg_num, val); + + if (mii_data->reg_num == MII_BMCR + && val & BMCR_RESET + && phydev->drv->config_init) + phydev->drv->config_init(phydev); + break; + } + + return 0; +} + +/* phy_start_machine: + * + * description: The PHY infrastructure can run a state machine + * which tracks whether the PHY is starting up, negotiating, + * etc. This function starts the timer which tracks the state + * of the PHY. If you want to be notified when the state + * changes, pass in the callback, otherwise, pass NULL. If you + * want to maintain your own state machine, do not call this + * function. */ +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_state = handler; + + init_timer(&phydev->phy_timer); + phydev->phy_timer.function = &phy_timer; + phydev->phy_timer.data = (unsigned long) phydev; + mod_timer(&phydev->phy_timer, jiffies + HZ); +} + +/* phy_stop_machine + * + * description: Stops the state machine timer, sets the state to + * UP (unless it wasn't up yet), and then frees the interrupt, + * if it is in use. This function must be called BEFORE + * phy_detach. + */ +void phy_stop_machine(struct phy_device *phydev) +{ + del_timer_sync(&phydev->phy_timer); + + spin_lock(&phydev->lock); + if (phydev->state > PHY_UP) + phydev->state = PHY_UP; + spin_unlock(&phydev->lock); + + if (phydev->irq != PHY_POLL) + phy_stop_interrupts(phydev); + + phydev->adjust_state = NULL; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_error: + * + * Moves the PHY to the HALTED state in response to a read + * or write error, and tells the controller the link is down. + * Must not be called from interrupt context, or while the + * phydev->lock is held. + */ +void phy_error(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + phydev->state = PHY_HALTED; + spin_unlock(&phydev->lock); +} + +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_clear_interrupt(phydev); + + if (err < 0) + return err; + + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + return err; +} +EXPORT_SYMBOL(phy_enable_interrupts); + +/* Disable the PHY interrupts from the PHY side */ +int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} +EXPORT_SYMBOL(phy_disable_interrupts); + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); + +/* PHY timer which handles the state machine */ +static void phy_timer(unsigned long data) +{ + struct phy_device *phydev = (struct phy_device *)data; + int needs_aneg = 0; + int err = 0; + + spin_lock(&phydev->lock); + + if (phydev->adjust_state) + phydev->adjust_state(phydev->attached_dev); + + switch(phydev->state) { + case PHY_DOWN: + case PHY_STARTING: + case PHY_READY: + case PHY_PENDING: + break; + case PHY_UP: + needs_aneg = 1; + + phydev->link_timeout = PHY_AN_TIMEOUT; + + break; + case PHY_AN: + /* Check if negotiation is done. Break + * if there's an error */ + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* If auto-negotiation is done, we change to + * either RUNNING, or NOLINK */ + if (err > 0) { + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + } else if (0 == phydev->link_timeout--) { + /* The counter expired, so either we + * switch to forced mode, or the + * magic_aneg bit exists, and we try aneg + * again */ + if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { + int idx; + + /* We'll start from the + * fastest speed, and work + * our way down */ + idx = phy_find_valid(0, + phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + phydev->autoneg = AUTONEG_DISABLE; + phydev->state = PHY_FORCING; + phydev->link_timeout = + PHY_FORCE_TIMEOUT; + + pr_info("Trying %d/%s\n", + phydev->speed, + DUPLEX_FULL == + phydev->duplex ? + "FULL" : "HALF"); + } + + needs_aneg = 1; + } + break; + case PHY_NOLINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_FORCING: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + if (0 == phydev->link_timeout--) { + phy_force_reduction(phydev); + needs_aneg = 1; + } + } + + phydev->adjust_link(phydev->attached_dev); + break; + case PHY_RUNNING: + /* Only register a CHANGE if we are + * polling */ + if (PHY_POLL == phydev->irq) + phydev->state = PHY_CHANGELINK; + break; + case PHY_CHANGELINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + if (PHY_POLL != phydev->irq) + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + break; + case PHY_HALTED: + if (phydev->link) { + phydev->link = 0; + netif_carrier_off(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_RESUMING: + + err = phy_clear_interrupt(phydev); + + if (err) + break; + + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + + if (err) + break; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* err > 0 if AN is done. + * Otherwise, it's 0, and we're + * still waiting for AN */ + if (err > 0) { + phydev->state = PHY_RUNNING; + } else { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } + } else + phydev->state = PHY_RUNNING; + break; + } + + spin_unlock(&phydev->lock); + + if (needs_aneg) + err = phy_start_aneg(phydev); + + if (err < 0) + phy_error(phydev); + + mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); +} + +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy.c.orig b/drivers/net/phy/phy.c.orig new file mode 100644 index 000000000000..6af17cec9ace --- /dev/null +++ b/drivers/net/phy/phy.c.orig @@ -0,0 +1,860 @@ +/* + * drivers/net/phy/phy.c + * + * Framework for configuring and reading PHY devices + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static void phy_change(void *data); +static void phy_timer(unsigned long data); + +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); + + +/* Convenience functions for reading/writing a given PHY + * register. They MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. */ +int phy_read(struct phy_device *phydev, u16 regnum) +{ + int retval; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + retval = bus->read(bus, phydev->addr, regnum); + spin_unlock_bh(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(phy_read); + +int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + int err; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + err = bus->write(bus, phydev->addr, regnum, val); + spin_unlock_bh(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(phy_write); + + +int phy_clear_interrupt(struct phy_device *phydev) +{ + int err = 0; + + if (phydev->drv->ack_interrupt) + err = phydev->drv->ack_interrupt(phydev); + + return err; +} + + +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) +{ + int err = 0; + + phydev->interrupts = interrupts; + if (phydev->drv->config_intr) + err = phydev->drv->config_intr(phydev); + + return err; +} + + +/* phy_aneg_done + * + * description: Reads the status register and returns 0 either if + * auto-negotiation is incomplete, or if there was an error. + * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + */ +static inline int phy_aneg_done(struct phy_device *phydev) +{ + int retval; + + retval = phy_read(phydev, MII_BMSR); + + return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); +} + +/* phy_start_aneg + * + * description: Calls the PHY driver's config_aneg, and then + * sets the PHY state to PHY_AN if auto-negotiation is enabled, + * and to PHY_FORCING if auto-negotiation is disabled. Unless + * the PHY is currently HALTED. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value */ +struct phy_setting { + int speed; + int duplex; + u32 setting; +}; + +/* A mapping of all SUPPORTED settings to speed/duplex */ +static struct phy_setting settings[] = { + { + .speed = 10000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_1000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_1000baseT_Half, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_100baseT_Full, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_100baseT_Half, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10baseT_Full, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_10baseT_Half, + }, +}; + +#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) + +/* phy_find_setting + * + * description: Searches the settings array for the setting which + * matches the desired speed and duplex, and returns the index + * of that setting. Returns the index of the last setting if + * none of the others match. + */ +static inline int phy_find_setting(int speed, int duplex) +{ + int idx = 0; + + while (idx < ARRAY_SIZE(settings) && + (settings[idx].speed != speed || + settings[idx].duplex != duplex)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_find_valid + * idx: The first index in settings[] to search + * features: A mask of the valid settings + * + * description: Returns the index of the first valid setting less + * than or equal to the one pointed to by idx, as determined by + * the mask in features. Returns the index of the last setting + * if nothing else matches. + */ +static inline int phy_find_valid(int idx, u32 features) +{ + while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_sanitize_settings + * + * description: Make sure the PHY is set to supported speeds and + * duplexes. Drop down by one in this order: 1000/FULL, + * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF + */ +void phy_sanitize_settings(struct phy_device *phydev) +{ + u32 features = phydev->supported; + int idx; + + /* Sanitize settings based on PHY capabilities */ + if ((features & SUPPORTED_Autoneg) == 0) + phydev->autoneg = 0; + + idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), + features); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; +} +EXPORT_SYMBOL(phy_sanitize_settings); + +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + +/* phy_ethtool_sset: + * A generic ethtool sset function. Handles all the details + * + * A few notes about parameter checking: + * - We don't set port or transceiver, so we don't care what they + * were set to. + * - phy_start_aneg() will make sure forced settings are sane, and + * choose the next best ones from the ones selected, so we don't + * care if ethtool tries to give us bad values + */ +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + if (cmd->phy_address != phydev->addr) + return -EINVAL; + + /* We make sure that we don't pass unsupported + * values in to the PHY */ + cmd->advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_DISABLE + && ((cmd->speed != SPEED_1000 + && cmd->speed != SPEED_100 + && cmd->speed != SPEED_10) + || (cmd->duplex != DUPLEX_HALF + && cmd->duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = cmd->autoneg; + + phydev->speed = cmd->speed; + + phydev->advertising = cmd->advertising; + + if (AUTONEG_ENABLE == cmd->autoneg) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = cmd->duplex; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} + +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + cmd->supported = phydev->supported; + + cmd->advertising = phydev->advertising; + + cmd->speed = phydev->speed; + cmd->duplex = phydev->duplex; + cmd->port = PORT_MII; + cmd->phy_address = phydev->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = phydev->autoneg; + + return 0; +} + + +/* Note that this function is currently incompatible with the + * PHYCONTROL layer. It changes registers without regard to + * current state. Use at own risk + */ +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd) +{ + u16 val = mii_data->val_in; + + switch (cmd) { + case SIOCGMIIPHY: + mii_data->phy_id = phydev->addr; + break; + case SIOCGMIIREG: + mii_data->val_out = phy_read(phydev, mii_data->reg_num); + break; + + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == phydev->addr) { + switch(mii_data->reg_num) { + case MII_BMCR: + if (val & (BMCR_RESET|BMCR_ANENABLE)) + phydev->autoneg = AUTONEG_DISABLE; + else + phydev->autoneg = AUTONEG_ENABLE; + if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + break; + case MII_ADVERTISE: + phydev->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + phy_write(phydev, mii_data->reg_num, val); + + if (mii_data->reg_num == MII_BMCR + && val & BMCR_RESET + && phydev->drv->config_init) + phydev->drv->config_init(phydev); + break; + } + + return 0; +} + +/* phy_start_machine: + * + * description: The PHY infrastructure can run a state machine + * which tracks whether the PHY is starting up, negotiating, + * etc. This function starts the timer which tracks the state + * of the PHY. If you want to be notified when the state + * changes, pass in the callback, otherwise, pass NULL. If you + * want to maintain your own state machine, do not call this + * function. */ +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_state = handler; + + init_timer(&phydev->phy_timer); + phydev->phy_timer.function = &phy_timer; + phydev->phy_timer.data = (unsigned long) phydev; + mod_timer(&phydev->phy_timer, jiffies + HZ); +} + +/* phy_stop_machine + * + * description: Stops the state machine timer, sets the state to + * UP (unless it wasn't up yet), and then frees the interrupt, + * if it is in use. This function must be called BEFORE + * phy_detach. + */ +void phy_stop_machine(struct phy_device *phydev) +{ + del_timer_sync(&phydev->phy_timer); + + spin_lock(&phydev->lock); + if (phydev->state > PHY_UP) + phydev->state = PHY_UP; + spin_unlock(&phydev->lock); + + if (phydev->irq != PHY_POLL) + phy_stop_interrupts(phydev); + + phydev->adjust_state = NULL; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_error: + * + * Moves the PHY to the HALTED state in response to a read + * or write error, and tells the controller the link is down. + * Must not be called from interrupt context, or while the + * phydev->lock is held. + */ +void phy_error(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + phydev->state = PHY_HALTED; + spin_unlock(&phydev->lock); +} + +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_clear_interrupt(phydev); + + if (err < 0) + return err; + + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + return err; +} + +/* Disable the PHY interrupts from the PHY side */ +int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); + +/* PHY timer which handles the state machine */ +static void phy_timer(unsigned long data) +{ + struct phy_device *phydev = (struct phy_device *)data; + int needs_aneg = 0; + int err = 0; + + spin_lock(&phydev->lock); + + if (phydev->adjust_state) + phydev->adjust_state(phydev->attached_dev); + + switch(phydev->state) { + case PHY_DOWN: + case PHY_STARTING: + case PHY_READY: + case PHY_PENDING: + break; + case PHY_UP: + needs_aneg = 1; + + phydev->link_timeout = PHY_AN_TIMEOUT; + + break; + case PHY_AN: + /* Check if negotiation is done. Break + * if there's an error */ + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* If auto-negotiation is done, we change to + * either RUNNING, or NOLINK */ + if (err > 0) { + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + } else if (0 == phydev->link_timeout--) { + /* The counter expired, so either we + * switch to forced mode, or the + * magic_aneg bit exists, and we try aneg + * again */ + if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { + int idx; + + /* We'll start from the + * fastest speed, and work + * our way down */ + idx = phy_find_valid(0, + phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + phydev->autoneg = AUTONEG_DISABLE; + phydev->state = PHY_FORCING; + phydev->link_timeout = + PHY_FORCE_TIMEOUT; + + pr_info("Trying %d/%s\n", + phydev->speed, + DUPLEX_FULL == + phydev->duplex ? + "FULL" : "HALF"); + } + + needs_aneg = 1; + } + break; + case PHY_NOLINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_FORCING: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + if (0 == phydev->link_timeout--) { + phy_force_reduction(phydev); + needs_aneg = 1; + } + } + + phydev->adjust_link(phydev->attached_dev); + break; + case PHY_RUNNING: + /* Only register a CHANGE if we are + * polling */ + if (PHY_POLL == phydev->irq) + phydev->state = PHY_CHANGELINK; + break; + case PHY_CHANGELINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + if (PHY_POLL != phydev->irq) + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + break; + case PHY_HALTED: + if (phydev->link) { + phydev->link = 0; + netif_carrier_off(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_RESUMING: + + err = phy_clear_interrupt(phydev); + + if (err) + break; + + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + + if (err) + break; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* err > 0 if AN is done. + * Otherwise, it's 0, and we're + * still waiting for AN */ + if (err > 0) { + phydev->state = PHY_RUNNING; + } else { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } + } else + phydev->state = PHY_RUNNING; + break; + } + + spin_unlock(&phydev->lock); + + if (needs_aneg) + err = phy_start_aneg(phydev); + + if (err < 0) + phy_error(phydev); + + mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); +} + +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c new file mode 100644 index 000000000000..f0595af4c837 --- /dev/null +++ b/drivers/net/phy/phy_device.c @@ -0,0 +1,682 @@ +/* + * drivers/net/phy/phy_device.c + * + * Framework for finding and configuring PHYs. + * Also contains generic PHY driver + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* get_phy_device + * + * description: Reads the ID registers of the PHY at addr on the + * bus, then allocates and returns the phy_device to + * represent it. + */ +struct phy_device * get_phy_device(struct mii_bus *bus, int addr) +{ + int phy_reg; + u32 phy_id; + struct phy_device *dev = NULL; + + /* Grab the bits from PHYIR1, and put them + * in the upper half */ + phy_reg = bus->read(bus, addr, MII_PHYSID1); + + if (phy_reg < 0) + return ERR_PTR(phy_reg); + + phy_id = (phy_reg & 0xffff) << 16; + + /* Grab the bits from PHYIR2, and put them in the lower half */ + phy_reg = bus->read(bus, addr, MII_PHYSID2); + + if (phy_reg < 0) + return ERR_PTR(phy_reg); + + phy_id |= (phy_reg & 0xffff); + + /* If the phy_id is all Fs, there is no device there */ + if (0xffffffff == phy_id) + return NULL; + + /* Otherwise, we allocate the device, and initialize the + * default values */ + dev = kcalloc(1, sizeof(*dev), GFP_KERNEL); + + if (NULL == dev) + return ERR_PTR(-ENOMEM); + + dev->speed = 0; + dev->duplex = -1; + dev->pause = dev->asym_pause = 0; + dev->link = 1; + + dev->autoneg = AUTONEG_ENABLE; + + dev->addr = addr; + dev->phy_id = phy_id; + dev->bus = bus; + + dev->state = PHY_DOWN; + + spin_lock_init(&dev->lock); + + return dev; +} + +/* phy_prepare_link: + * + * description: Tells the PHY infrastructure to handle the + * gory details on monitoring link status (whether through + * polling or an interrupt), and to call back to the + * connected device driver when the link status changes. + * If you want to monitor your own link state, don't call + * this function */ +void phy_prepare_link(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_link = handler; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_connect: + * + * description: Convenience function for connecting ethernet + * devices to PHY devices. The default behavior is for + * the PHY infrastructure to handle everything, and only notify + * the connected driver when the link status changes. If you + * don't want, or can't use the provided functionality, you may + * choose to call only the subset of functions which provide + * the desired functionality. + */ +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags) +{ + struct phy_device *phydev; + + phydev = phy_attach(dev, phy_id, flags); + + if (IS_ERR(phydev)) + return phydev; + + phy_prepare_link(phydev, handler); + + phy_start_machine(phydev, NULL); + + if (phydev->irq > 0) + phy_start_interrupts(phydev); + + return phydev; +} +EXPORT_SYMBOL(phy_connect); + +void phy_disconnect(struct phy_device *phydev) +{ + if (phydev->irq > 0) + phy_stop_interrupts(phydev); + + phy_stop_machine(phydev); + + phydev->adjust_link = NULL; + + phy_detach(phydev); +} +EXPORT_SYMBOL(phy_disconnect); + +#endif /* CONFIG_PHYCONTROL */ + +/* phy_attach: + * + * description: Called by drivers to attach to a particular PHY + * device. The phy_device is found, and properly hooked up + * to the phy_driver. If no driver is attached, then the + * genphy_driver is used. The phy_device is given a ptr to + * the attaching device, and given a callback for link status + * change. The phy_device is returned to the attaching + * driver. + */ +static int phy_compare_id(struct device *dev, void *data) +{ + return strcmp((char *)data, dev->bus_id) ? 0 : 1; +} + +struct phy_device *phy_attach(struct net_device *dev, + const char *phy_id, u32 flags) +{ + struct bus_type *bus = &mdio_bus_type; + struct phy_device *phydev; + struct device *d; + + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); + + if (d) { + phydev = to_phy_device(d); + } else { + printk(KERN_ERR "%s not found\n", phy_id); + return ERR_PTR(-ENODEV); + } + + /* Assume that if there is no driver, that it doesn't + * exist, and we should use the genphy driver. */ + if (NULL == d->driver) { + int err; + down_write(&d->bus->subsys.rwsem); + d->driver = &genphy_driver.driver; + + err = d->driver->probe(d); + + if (err < 0) + return ERR_PTR(err); + + device_bind_driver(d); + up_write(&d->bus->subsys.rwsem); + } + + if (phydev->attached_dev) { + printk(KERN_ERR "%s: %s already attached\n", + dev->name, phy_id); + return ERR_PTR(-EBUSY); + } + + phydev->attached_dev = dev; + + phydev->dev_flags = flags; + + return phydev; +} +EXPORT_SYMBOL(phy_attach); + +void phy_detach(struct phy_device *phydev) +{ + phydev->attached_dev = NULL; + + /* If the device had no specific driver before (i.e. - it + * was using the generic driver), we unbind the device + * from the generic driver so that there's a chance a + * real driver could be loaded */ + if (phydev->dev.driver == &genphy_driver.driver) { + down_write(&phydev->dev.bus->subsys.rwsem); + device_release_driver(&phydev->dev); + up_write(&phydev->dev.bus->subsys.rwsem); + } +} +EXPORT_SYMBOL(phy_detach); + + +/* Generic PHY support and helper functions */ + +/* genphy_config_advert + * + * description: Writes MII_ADVERTISE with the appropriate values, + * after sanitizing the values to make sure we only advertise + * what is supported + */ +int genphy_config_advert(struct phy_device *phydev) +{ + u32 advertise; + int adv; + int err; + + /* Only allow advertising what + * this PHY supports */ + phydev->advertising &= phydev->supported; + advertise = phydev->advertising; + + /* Setup standard advertisement */ + adv = phy_read(phydev, MII_ADVERTISE); + + if (adv < 0) + return adv; + + adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | + ADVERTISE_PAUSE_ASYM); + if (advertise & ADVERTISED_10baseT_Half) + adv |= ADVERTISE_10HALF; + if (advertise & ADVERTISED_10baseT_Full) + adv |= ADVERTISE_10FULL; + if (advertise & ADVERTISED_100baseT_Half) + adv |= ADVERTISE_100HALF; + if (advertise & ADVERTISED_100baseT_Full) + adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; + + err = phy_write(phydev, MII_ADVERTISE, adv); + + if (err < 0) + return err; + + /* Configure gigabit if it's supported */ + if (phydev->supported & (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full)) { + adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + + adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); + if (advertise & SUPPORTED_1000baseT_Half) + adv |= ADVERTISE_1000HALF; + if (advertise & SUPPORTED_1000baseT_Full) + adv |= ADVERTISE_1000FULL; + err = phy_write(phydev, MII_CTRL1000, adv); + + if (err < 0) + return err; + } + + return adv; +} +EXPORT_SYMBOL(genphy_config_advert); + +/* genphy_setup_forced + * + * description: Configures MII_BMCR to force speed/duplex + * to the values in phydev. Assumes that the values are valid. + * Please see phy_sanitize_settings() */ +int genphy_setup_forced(struct phy_device *phydev) +{ + int ctl = BMCR_RESET; + + phydev->pause = phydev->asym_pause = 0; + + if (SPEED_1000 == phydev->speed) + ctl |= BMCR_SPEED1000; + else if (SPEED_100 == phydev->speed) + ctl |= BMCR_SPEED100; + + if (DUPLEX_FULL == phydev->duplex) + ctl |= BMCR_FULLDPLX; + + ctl = phy_write(phydev, MII_BMCR, ctl); + + if (ctl < 0) + return ctl; + + /* We just reset the device, so we'd better configure any + * settings the PHY requires to operate */ + if (phydev->drv->config_init) + ctl = phydev->drv->config_init(phydev); + + return ctl; +} + + +/* Enable and Restart Autonegotiation */ +int genphy_restart_aneg(struct phy_device *phydev) +{ + int ctl; + + ctl = phy_read(phydev, MII_BMCR); + + if (ctl < 0) + return ctl; + + ctl |= (BMCR_ANENABLE | BMCR_ANRESTART); + + /* Don't isolate the PHY if we're negotiating */ + ctl &= ~(BMCR_ISOLATE); + + ctl = phy_write(phydev, MII_BMCR, ctl); + + return ctl; +} + + +/* genphy_config_aneg + * + * description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we write the BMCR + */ +int genphy_config_aneg(struct phy_device *phydev) +{ + int err = 0; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = genphy_config_advert(phydev); + + if (err < 0) + return err; + + err = genphy_restart_aneg(phydev); + } else + err = genphy_setup_forced(phydev); + + return err; +} +EXPORT_SYMBOL(genphy_config_aneg); + +/* genphy_update_link + * + * description: Update the value in phydev->link to reflect the + * current link value. In order to do this, we need to read + * the status register twice, keeping the second value + */ +int genphy_update_link(struct phy_device *phydev) +{ + int status; + + /* Do a fake read */ + status = phy_read(phydev, MII_BMSR); + + if (status < 0) + return status; + + /* Read link and autonegotiation status */ + status = phy_read(phydev, MII_BMSR); + + if (status < 0) + return status; + + if ((status & BMSR_LSTATUS) == 0) + phydev->link = 0; + else + phydev->link = 1; + + return 0; +} + +/* genphy_read_status + * + * description: Check the link, then figure out the current state + * by comparing what we advertise with what the link partner + * advertises. Start by checking the gigabit possibilities, + * then move on to 10/100. + */ +int genphy_read_status(struct phy_device *phydev) +{ + int adv; + int err; + int lpa; + int lpagb = 0; + + /* Update the link, but return if there + * was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + if (AUTONEG_ENABLE == phydev->autoneg) { + if (phydev->supported & (SUPPORTED_1000baseT_Half + | SUPPORTED_1000baseT_Full)) { + lpagb = phy_read(phydev, MII_STAT1000); + + if (lpagb < 0) + return lpagb; + + adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + + lpagb &= adv << 2; + } + + lpa = phy_read(phydev, MII_LPA); + + if (lpa < 0) + return lpa; + + adv = phy_read(phydev, MII_ADVERTISE); + + if (adv < 0) + return adv; + + lpa &= adv; + + phydev->speed = SPEED_10; + phydev->duplex = DUPLEX_HALF; + phydev->pause = phydev->asym_pause = 0; + + if (lpagb & (LPA_1000FULL | LPA_1000HALF)) { + phydev->speed = SPEED_1000; + + if (lpagb & LPA_1000FULL) + phydev->duplex = DUPLEX_FULL; + } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + phydev->speed = SPEED_100; + + if (lpa & LPA_100FULL) + phydev->duplex = DUPLEX_FULL; + } else + if (lpa & LPA_10FULL) + phydev->duplex = DUPLEX_FULL; + + if (phydev->duplex == DUPLEX_FULL){ + phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + int bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + if (bmcr & BMCR_SPEED1000) + phydev->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phydev->speed = SPEED_100; + else + phydev->speed = SPEED_10; + + phydev->pause = phydev->asym_pause = 0; + } + + return 0; +} +EXPORT_SYMBOL(genphy_read_status); + +static int genphy_config_init(struct phy_device *phydev) +{ + u32 val; + u32 features; + + /* For now, I'll claim that the generic driver supports + * all possible port types */ + features = (SUPPORTED_TP | SUPPORTED_MII + | SUPPORTED_AUI | SUPPORTED_FIBRE | + SUPPORTED_BNC); + + /* Do we support autonegotiation? */ + val = phy_read(phydev, MII_BMSR); + + if (val < 0) + return val; + + if (val & BMSR_ANEGCAPABLE) + features |= SUPPORTED_Autoneg; + + if (val & BMSR_100FULL) + features |= SUPPORTED_100baseT_Full; + if (val & BMSR_100HALF) + features |= SUPPORTED_100baseT_Half; + if (val & BMSR_10FULL) + features |= SUPPORTED_10baseT_Full; + if (val & BMSR_10HALF) + features |= SUPPORTED_10baseT_Half; + + if (val & BMSR_ESTATEN) { + val = phy_read(phydev, MII_ESTATUS); + + if (val < 0) + return val; + + if (val & ESTATUS_1000_TFULL) + features |= SUPPORTED_1000baseT_Full; + if (val & ESTATUS_1000_THALF) + features |= SUPPORTED_1000baseT_Half; + } + + phydev->supported = features; + phydev->advertising = features; + + return 0; +} + + +/* phy_probe + * + * description: Take care of setting up the phy_device structure, + * set the state to READY (the driver's init function should + * set it to STARTING if needed). + */ +static int phy_probe(struct device *dev) +{ + struct phy_device *phydev; + struct phy_driver *phydrv; + struct device_driver *drv; + int err = 0; + + phydev = to_phy_device(dev); + + /* Make sure the driver is held. + * XXX -- Is this correct? */ + drv = get_driver(phydev->dev.driver); + phydrv = to_phy_driver(drv); + phydev->drv = phydrv; + + /* Disable the interrupt if the PHY doesn't support it */ + if (!(phydrv->flags & PHY_HAS_INTERRUPT)) + phydev->irq = PHY_POLL; + + spin_lock(&phydev->lock); + + /* Start out supporting everything. Eventually, + * a controller will attach, and may modify one + * or both of these values */ + phydev->supported = phydrv->features; + phydev->advertising = phydrv->features; + + /* Set the state to READY by default */ + phydev->state = PHY_READY; + + if (phydev->drv->probe) + err = phydev->drv->probe(phydev); + + spin_unlock(&phydev->lock); + + if (err < 0) + return err; + + if (phydev->drv->config_init) + err = phydev->drv->config_init(phydev); + + return err; +} + +static int phy_remove(struct device *dev) +{ + struct phy_device *phydev; + + phydev = to_phy_device(dev); + + spin_lock(&phydev->lock); + phydev->state = PHY_DOWN; + spin_unlock(&phydev->lock); + + if (phydev->drv->remove) + phydev->drv->remove(phydev); + + put_driver(dev->driver); + phydev->drv = NULL; + + return 0; +} + +int phy_driver_register(struct phy_driver *new_driver) +{ + int retval; + + memset(&new_driver->driver, 0, sizeof(new_driver->driver)); + new_driver->driver.name = new_driver->name; + new_driver->driver.bus = &mdio_bus_type; + new_driver->driver.probe = phy_probe; + new_driver->driver.remove = phy_remove; + + retval = driver_register(&new_driver->driver); + + if (retval) { + printk(KERN_ERR "%s: Error %d in registering driver\n", + new_driver->name, retval); + + return retval; + } + + pr_info("%s: Registered new driver\n", new_driver->name); + + return 0; +} +EXPORT_SYMBOL(phy_driver_register); + +void phy_driver_unregister(struct phy_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(phy_driver_unregister); + +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner = THIS_MODULE, }, +}; + +static int __init genphy_init(void) +{ + return phy_driver_register(&genphy_driver); + +} + +static void __exit genphy_exit(void) +{ + phy_driver_unregister(&genphy_driver); +} + +module_init(genphy_init); +module_exit(genphy_exit); diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c new file mode 100644 index 000000000000..d461ba457631 --- /dev/null +++ b/drivers/net/phy/qsemi.c @@ -0,0 +1,143 @@ +/* + * drivers/net/phy/qsemi.c + * + * Driver for Quality Semiconductor PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* ------------------------------------------------------------------------- */ +/* The Quality Semiconductor QS6612 is used on the RPX CLLF */ + +/* register definitions */ + +#define MII_QS6612_MCR 17 /* Mode Control Register */ +#define MII_QS6612_FTR 27 /* Factory Test Register */ +#define MII_QS6612_MCO 28 /* Misc. Control Register */ +#define MII_QS6612_ISR 29 /* Interrupt Source Register */ +#define MII_QS6612_IMR 30 /* Interrupt Mask Register */ +#define MII_QS6612_IMR_INIT 0x003a +#define MII_QS6612_PCR 31 /* 100BaseTx PHY Control Reg. */ + +#define QS6612_PCR_AN_COMPLETE 0x1000 +#define QS6612_PCR_RLBEN 0x0200 +#define QS6612_PCR_DCREN 0x0100 +#define QS6612_PCR_4B5BEN 0x0040 +#define QS6612_PCR_TX_ISOLATE 0x0020 +#define QS6612_PCR_MLT3_DIS 0x0002 +#define QS6612_PCR_SCRM_DESCRM 0x0001 + +MODULE_DESCRIPTION("Quality Semiconductor PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +/* Returns 0, unless there's a write error */ +static int qs6612_config_init(struct phy_device *phydev) +{ + /* The PHY powers up isolated on the RPX, + * so send a command to allow operation. + * XXX - My docs indicate this should be 0x0940 + * ...or something. The current value sets three + * reserved bits, bit 11, which specifies it should be + * set to one, bit 10, which specifies it should be set + * to 0, and bit 7, which doesn't specify. However, my + * docs are preliminary, and I will leave it like this + * until someone more knowledgable corrects me or it. + * -- Andy Fleming + */ + return phy_write(phydev, MII_QS6612_PCR, 0x0dc0); +} + +static int qs6612_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, MII_QS6612_ISR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_BMSR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_EXPANSION); + + if (err < 0) + return err; + + return 0; +} + +static int qs6612_config_intr(struct phy_device *phydev) +{ + int err; + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_QS6612_IMR, + MII_QS6612_IMR_INIT); + else + err = phy_write(phydev, MII_QS6612_IMR, 0); + + return err; + +} + +static struct phy_driver qs6612_driver = { + .phy_id = 0x00181440, + .name = "QS6612", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = qs6612_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = qs6612_ack_interrupt, + .config_intr = qs6612_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init qs6612_init(void) +{ + return phy_driver_register(&qs6612_driver); +} + +static void __exit qs6612_exit(void) +{ + phy_driver_unregister(&qs6612_driver); +} + +module_init(qs6612_init); +module_exit(qs6612_exit); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a0ab26aab450..d7021c391b2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -408,6 +408,8 @@ struct ethtool_ops { #define SUPPORTED_FIBRE (1 << 10) #define SUPPORTED_BNC (1 << 11) #define SUPPORTED_10000baseT_Full (1 << 12) +#define SUPPORTED_Pause (1 << 13) +#define SUPPORTED_Asym_Pause (1 << 14) /* Indicates what features are advertised by the interface. */ #define ADVERTISED_10baseT_Half (1 << 0) @@ -423,6 +425,8 @@ struct ethtool_ops { #define ADVERTISED_FIBRE (1 << 10) #define ADVERTISED_BNC (1 << 11) #define ADVERTISED_10000baseT_Full (1 << 12) +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the diff --git a/include/linux/mii.h b/include/linux/mii.h index 374b615ea9ea..9b8d0476988a 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -22,6 +22,7 @@ #define MII_EXPANSION 0x06 /* Expansion register */ #define MII_CTRL1000 0x09 /* 1000BASE-T control */ #define MII_STAT1000 0x0a /* 1000BASE-T status */ +#define MII_ESTATUS 0x0f /* Extended Status */ #define MII_DCOUNTER 0x12 /* Disconnect counter */ #define MII_FCSCOUNTER 0x13 /* False carrier counter */ #define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */ @@ -54,7 +55,10 @@ #define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ #define BMSR_RFAULT 0x0010 /* Remote fault detected */ #define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ -#define BMSR_RESV 0x07c0 /* Unused... */ +#define BMSR_RESV 0x00c0 /* Unused... */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +#define BMSR_100FULL2 0x0200 /* Can do 100BASE-T2 HDX */ +#define BMSR_100HALF2 0x0400 /* Can do 100BASE-T2 FDX */ #define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ #define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ #define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ @@ -114,6 +118,9 @@ #define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */ #define EXPANSION_RESV 0xffe0 /* Unused... */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + /* N-way test register. */ #define NWAYTEST_RESV1 0x00ff /* Unused... */ #define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */ diff --git a/include/linux/phy.h b/include/linux/phy.h new file mode 100644 index 000000000000..3404804dc22d --- /dev/null +++ b/include/linux/phy.h @@ -0,0 +1,378 @@ +/* + * include/linux/phy.h + * + * Framework and drivers for configuring and reading different PHYs + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __PHY_H +#define __PHY_H + +#include +#include + +#define PHY_BASIC_FEATURES (SUPPORTED_10baseT_Half | \ + SUPPORTED_10baseT_Full | \ + SUPPORTED_100baseT_Half | \ + SUPPORTED_100baseT_Full | \ + SUPPORTED_Autoneg | \ + SUPPORTED_TP | \ + SUPPORTED_MII) + +#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ + SUPPORTED_1000baseT_Half | \ + SUPPORTED_1000baseT_Full) + +/* Set phydev->irq to PHY_POLL if interrupts are not supported, + * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if + * the attached driver handles the interrupt + */ +#define PHY_POLL -1 +#define PHY_IGNORE_INTERRUPT -2 + +#define PHY_HAS_INTERRUPT 0x00000001 +#define PHY_HAS_MAGICANEG 0x00000002 + +#define MII_BUS_MAX 4 + + +#define PHY_INIT_TIMEOUT 100000 +#define PHY_STATE_TIME 1 +#define PHY_FORCE_TIMEOUT 10 +#define PHY_AN_TIMEOUT 10 + +#define PHY_MAX_ADDR 32 + +/* The Bus class for PHYs. Devices which provide access to + * PHYs should register using this structure */ +struct mii_bus { + const char *name; + int id; + void *priv; + int (*read)(struct mii_bus *bus, int phy_id, int regnum); + int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); + int (*reset)(struct mii_bus *bus); + + /* A lock to ensure that only one thing can read/write + * the MDIO bus at a time */ + spinlock_t mdio_lock; + + struct device *dev; + + /* list of all PHYs on bus */ + struct phy_device *phy_map[PHY_MAX_ADDR]; + + /* Pointer to an array of interrupts, each PHY's + * interrupt at the index matching its address */ + int *irq; +}; + +#define PHY_INTERRUPT_DISABLED 0x0 +#define PHY_INTERRUPT_ENABLED 0x80000000 + +/* PHY state machine states: + * + * DOWN: PHY device and driver are not ready for anything. probe + * should be called if and only if the PHY is in this state, + * given that the PHY device exists. + * - PHY driver probe function will, depending on the PHY, set + * the state to STARTING or READY + * + * STARTING: PHY device is coming up, and the ethernet driver is + * not ready. PHY drivers may set this in the probe function. + * If they do, they are responsible for making sure the state is + * eventually set to indicate whether the PHY is UP or READY, + * depending on the state when the PHY is done starting up. + * - PHY driver will set the state to READY + * - start will set the state to PENDING + * + * READY: PHY is ready to send and receive packets, but the + * controller is not. By default, PHYs which do not implement + * probe will be set to this state by phy_probe(). If the PHY + * driver knows the PHY is ready, and the PHY state is STARTING, + * then it sets this STATE. + * - start will set the state to UP + * + * PENDING: PHY device is coming up, but the ethernet driver is + * ready. phy_start will set this state if the PHY state is + * STARTING. + * - PHY driver will set the state to UP when the PHY is ready + * + * UP: The PHY and attached device are ready to do work. + * Interrupts should be started here. + * - timer moves to AN + * + * AN: The PHY is currently negotiating the link state. Link is + * therefore down for now. phy_timer will set this state when it + * detects the state is UP. config_aneg will set this state + * whenever called with phydev->autoneg set to AUTONEG_ENABLE. + * - If autonegotiation finishes, but there's no link, it sets + * the state to NOLINK. + * - If aneg finishes with link, it sets the state to RUNNING, + * and calls adjust_link + * - If autonegotiation did not finish after an arbitrary amount + * of time, autonegotiation should be tried again if the PHY + * supports "magic" autonegotiation (back to AN) + * - If it didn't finish, and no magic_aneg, move to FORCING. + * + * NOLINK: PHY is up, but not currently plugged in. + * - If the timer notes that the link comes back, we move to RUNNING + * - config_aneg moves to AN + * - phy_stop moves to HALTED + * + * FORCING: PHY is being configured with forced settings + * - if link is up, move to RUNNING + * - If link is down, we drop to the next highest setting, and + * retry (FORCING) after a timeout + * - phy_stop moves to HALTED + * + * RUNNING: PHY is currently up, running, and possibly sending + * and/or receiving packets + * - timer will set CHANGELINK if we're polling (this ensures the + * link state is polled every other cycle of this state machine, + * which makes it every other second) + * - irq will set CHANGELINK + * - config_aneg will set AN + * - phy_stop moves to HALTED + * + * CHANGELINK: PHY experienced a change in link state + * - timer moves to RUNNING if link + * - timer moves to NOLINK if the link is down + * - phy_stop moves to HALTED + * + * HALTED: PHY is up, but no polling or interrupts are done. Or + * PHY is in an error state. + * + * - phy_start moves to RESUMING + * + * RESUMING: PHY was halted, but now wants to run again. + * - If we are forcing, or aneg is done, timer moves to RUNNING + * - If aneg is not done, timer moves to AN + * - phy_stop moves to HALTED + */ +enum phy_state { + PHY_DOWN=0, + PHY_STARTING, + PHY_READY, + PHY_PENDING, + PHY_UP, + PHY_AN, + PHY_RUNNING, + PHY_NOLINK, + PHY_FORCING, + PHY_CHANGELINK, + PHY_HALTED, + PHY_RESUMING +}; + +/* phy_device: An instance of a PHY + * + * drv: Pointer to the driver for this PHY instance + * bus: Pointer to the bus this PHY is on + * dev: driver model device structure for this PHY + * phy_id: UID for this device found during discovery + * state: state of the PHY for management purposes + * dev_flags: Device-specific flags used by the PHY driver. + * addr: Bus address of PHY + * link_timeout: The number of timer firings to wait before the + * giving up on the current attempt at acquiring a link + * irq: IRQ number of the PHY's interrupt (-1 if none) + * phy_timer: The timer for handling the state machine + * phy_queue: A work_queue for the interrupt + * attached_dev: The attached enet driver's device instance ptr + * adjust_link: Callback for the enet controller to respond to + * changes in the link state. + * adjust_state: Callback for the enet driver to respond to + * changes in the state machine. + * + * speed, duplex, pause, supported, advertising, and + * autoneg are used like in mii_if_info + * + * interrupts currently only supports enabled or disabled, + * but could be changed in the future to support enabling + * and disabling specific interrupts + * + * Contains some infrastructure for polling and interrupt + * handling, as well as handling shifts in PHY hardware state + */ +struct phy_device { + /* Information about the PHY type */ + /* And management functions */ + struct phy_driver *drv; + + struct mii_bus *bus; + + struct device dev; + + u32 phy_id; + + enum phy_state state; + + u32 dev_flags; + + /* Bus address of the PHY (0-32) */ + int addr; + + /* forced speed & duplex (no autoneg) + * partner speed & duplex & pause (autoneg) + */ + int speed; + int duplex; + int pause; + int asym_pause; + + /* The most recently read link state */ + int link; + + /* Enabled Interrupts */ + u32 interrupts; + + /* Union of PHY and Attached devices' supported modes */ + /* See mii.h for more info */ + u32 supported; + u32 advertising; + + int autoneg; + + int link_timeout; + + /* Interrupt number for this PHY + * -1 means no interrupt */ + int irq; + + /* private data pointer */ + /* For use by PHYs to maintain extra state */ + void *priv; + + /* Interrupt and Polling infrastructure */ + struct work_struct phy_queue; + struct timer_list phy_timer; + + spinlock_t lock; + + struct net_device *attached_dev; + + void (*adjust_link)(struct net_device *dev); + + void (*adjust_state)(struct net_device *dev); +}; +#define to_phy_device(d) container_of(d, struct phy_device, dev) + +/* struct phy_driver: Driver structure for a particular PHY type + * + * phy_id: The result of reading the UID registers of this PHY + * type, and ANDing them with the phy_id_mask. This driver + * only works for PHYs with IDs which match this field + * name: The friendly name of this PHY type + * phy_id_mask: Defines the important bits of the phy_id + * features: A list of features (speed, duplex, etc) supported + * by this PHY + * flags: A bitfield defining certain other features this PHY + * supports (like interrupts) + * + * The drivers must implement config_aneg and read_status. All + * other functions are optional. Note that none of these + * functions should be called from interrupt time. The goal is + * for the bus read/write functions to be able to block when the + * bus transaction is happening, and be freed up by an interrupt + * (The MPC85xx has this ability, though it is not currently + * supported in the driver). + */ +struct phy_driver { + u32 phy_id; + char *name; + unsigned int phy_id_mask; + u32 features; + u32 flags; + + /* Called to initialize the PHY, + * including after a reset */ + int (*config_init)(struct phy_device *phydev); + + /* Called during discovery. Used to set + * up device-specific structures, if any */ + int (*probe)(struct phy_device *phydev); + + /* PHY Power Management */ + int (*suspend)(struct phy_device *phydev); + int (*resume)(struct phy_device *phydev); + + /* Configures the advertisement and resets + * autonegotiation if phydev->autoneg is on, + * forces the speed to the current settings in phydev + * if phydev->autoneg is off */ + int (*config_aneg)(struct phy_device *phydev); + + /* Determines the negotiated speed and duplex */ + int (*read_status)(struct phy_device *phydev); + + /* Clears any pending interrupts */ + int (*ack_interrupt)(struct phy_device *phydev); + + /* Enables or disables interrupts */ + int (*config_intr)(struct phy_device *phydev); + + /* Clears up any memory if needed */ + void (*remove)(struct phy_device *phydev); + + struct device_driver driver; +}; +#define to_phy_driver(d) container_of(d, struct phy_driver, driver) + +int phy_read(struct phy_device *phydev, u16 regnum); +int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +struct phy_device* get_phy_device(struct mii_bus *bus, int addr); +int phy_clear_interrupt(struct phy_device *phydev); +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); + +static inline int phy_read_status(struct phy_device *phydev) { + return phydev->drv->read_status(phydev); +} + +int genphy_config_advert(struct phy_device *phydev); +int genphy_setup_forced(struct phy_device *phydev); +int genphy_restart_aneg(struct phy_device *phydev); +int genphy_config_aneg(struct phy_device *phydev); +int genphy_update_link(struct phy_device *phydev); +int genphy_read_status(struct phy_device *phydev); +void phy_driver_unregister(struct phy_driver *drv); +int phy_driver_register(struct phy_driver *new_driver); +void phy_prepare_link(struct phy_device *phydev, + void (*adjust_link)(struct net_device *)); +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)); +void phy_stop_machine(struct phy_device *phydev); +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); + +extern struct bus_type mdio_bus_type; +extern struct phy_driver genphy_driver; +#endif /* __PHY_H */ From 6b38aefe924daf2e4fdd73b384f21c913f31b668 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 28 Jul 2005 15:00:15 -0400 Subject: [PATCH 040/584] [PATCH] bonding: ALB -- allow slave to use bond's MAC address if its own MAC address conflicts In ALB mode, allow new slave to use bond's MAC address if the new slave's MAC address is being used within the bond and no other slave is using the bond's MAC address. Signed-off-by: John W. Linville Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_alb.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 5ce606d9dc03..19e829b567d0 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1106,18 +1106,13 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav } } - if (found) { - /* a slave was found that is using the mac address - * of the new slave - */ - printk(KERN_ERR DRV_NAME - ": Error: the hw address of slave %s is not " - "unique - cannot enslave it!", - slave->dev->name); - return -EINVAL; - } + if (!found) + return 0; - return 0; + /* Try setting slave mac to bond address and fall-through + to code handling that situation below... */ + alb_set_slave_mac_addr(slave, bond->dev->dev_addr, + bond->alb_info.rlb_enabled); } /* The slave's address is equal to the address of the bond. From 504ff16cecf2a788181eddc9d6e47d94ce50a9f6 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Wed, 27 Jul 2005 01:14:50 -0700 Subject: [PATCH 041/584] [PATCH] tms380tr: move to DMA API This patch makes tms380tr use the new DMA API. Now that on Alpha, this API also supports bus master DMA for ISA (platform) devices, i changed the driver to use this new API. This also works around a bug in the firmware loader: The example provided in Documentation/firmware_class no longer works, as the firmware loader now calls get_kobj_path_length() and the kernel promptly oopses, as the home-grown device doesn't have a parent. Of course, this doesn't happen with a "real" device which has its bus (or pseudo bus in the case of platform) as parent. Converted tms380tr to use new DMA API: - proteon.c, skisa.c: use platform pseudo bus to create a struct device - Space.c: delete init hooks - abyss.c, tmspci.c: pass struct device to tms380tr.c - tms380tr.c, tms380tr.h: new DMA API, use real device fo firmware loader Signed-off-by: Jochen Friedrich Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/Space.c | 6 -- drivers/net/tokenring/abyss.c | 2 +- drivers/net/tokenring/proteon.c | 104 +++++++++++++++---------------- drivers/net/tokenring/skisa.c | 104 +++++++++++++++---------------- drivers/net/tokenring/tms380tr.c | 37 +++++------ drivers/net/tokenring/tms380tr.h | 8 +-- drivers/net/tokenring/tmspci.c | 4 +- 7 files changed, 122 insertions(+), 143 deletions(-) diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 3707df6b0cfa..11c44becc08f 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -323,12 +323,6 @@ extern struct net_device *proteon_probe(int unit); extern struct net_device *smctr_probe(int unit); static struct devprobe2 tr_probes2[] __initdata = { -#ifdef CONFIG_SKISA - {sk_isa_probe, 0}, -#endif -#ifdef CONFIG_PROTEON - {proteon_probe, 0}, -#endif #ifdef CONFIG_SMCTR {smctr_probe, 0}, #endif diff --git a/drivers/net/tokenring/abyss.c b/drivers/net/tokenring/abyss.c index 87103c400999..f1e4ef1188e4 100644 --- a/drivers/net/tokenring/abyss.c +++ b/drivers/net/tokenring/abyss.c @@ -139,7 +139,7 @@ static int __devinit abyss_attach(struct pci_dev *pdev, const struct pci_device_ */ dev->base_addr += 0x10; - ret = tmsdev_init(dev, PCI_MAX_ADDRESS, pdev); + ret = tmsdev_init(dev, PCI_MAX_ADDRESS, &pdev->dev); if (ret) { printk("%s: unable to get memory for dev->priv.\n", dev->name); diff --git a/drivers/net/tokenring/proteon.c b/drivers/net/tokenring/proteon.c index 40ad0fde28af..0a9597738d6c 100644 --- a/drivers/net/tokenring/proteon.c +++ b/drivers/net/tokenring/proteon.c @@ -62,8 +62,7 @@ static int dmalist[] __initdata = { }; static char cardname[] = "Proteon 1392\0"; - -struct net_device *proteon_probe(int unit); +static u64 dma_mask = ISA_MAX_ADDRESS; static int proteon_open(struct net_device *dev); static void proteon_read_eeprom(struct net_device *dev); static unsigned short proteon_setnselout_pins(struct net_device *dev); @@ -116,7 +115,7 @@ nodev: return -ENODEV; } -static int __init setup_card(struct net_device *dev) +static int __init setup_card(struct net_device *dev, struct device *pdev) { struct net_local *tp; static int versionprinted; @@ -137,7 +136,7 @@ static int __init setup_card(struct net_device *dev) } } if (err) - goto out4; + goto out5; /* At this point we have found a valid card. */ @@ -145,14 +144,15 @@ static int __init setup_card(struct net_device *dev) printk(KERN_DEBUG "%s", version); err = -EIO; - if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL)) + pdev->dma_mask = &dma_mask; + if (tmsdev_init(dev, ISA_MAX_ADDRESS, pdev)) goto out4; dev->base_addr &= ~3; proteon_read_eeprom(dev); - printk(KERN_DEBUG "%s: Ring Station Address: ", dev->name); + printk(KERN_DEBUG "proteon.c: Ring Station Address: "); printk("%2.2x", dev->dev_addr[0]); for (j = 1; j < 6; j++) printk(":%2.2x", dev->dev_addr[j]); @@ -185,7 +185,7 @@ static int __init setup_card(struct net_device *dev) if(irqlist[j] == 0) { - printk(KERN_INFO "%s: AutoSelect no IRQ available\n", dev->name); + printk(KERN_INFO "proteon.c: AutoSelect no IRQ available\n"); goto out3; } } @@ -196,15 +196,15 @@ static int __init setup_card(struct net_device *dev) break; if (irqlist[j] == 0) { - printk(KERN_INFO "%s: Illegal IRQ %d specified\n", - dev->name, dev->irq); + printk(KERN_INFO "proteon.c: Illegal IRQ %d specified\n", + dev->irq); goto out3; } if (request_irq(dev->irq, tms380tr_interrupt, 0, cardname, dev)) { - printk(KERN_INFO "%s: Selected IRQ %d not available\n", - dev->name, dev->irq); + printk(KERN_INFO "proteon.c: Selected IRQ %d not available\n", + dev->irq); goto out3; } } @@ -220,7 +220,7 @@ static int __init setup_card(struct net_device *dev) if(dmalist[j] == 0) { - printk(KERN_INFO "%s: AutoSelect no DMA available\n", dev->name); + printk(KERN_INFO "proteon.c: AutoSelect no DMA available\n"); goto out2; } } @@ -231,25 +231,25 @@ static int __init setup_card(struct net_device *dev) break; if (dmalist[j] == 0) { - printk(KERN_INFO "%s: Illegal DMA %d specified\n", - dev->name, dev->dma); + printk(KERN_INFO "proteon.c: Illegal DMA %d specified\n", + dev->dma); goto out2; } if (request_dma(dev->dma, cardname)) { - printk(KERN_INFO "%s: Selected DMA %d not available\n", - dev->name, dev->dma); + printk(KERN_INFO "proteon.c: Selected DMA %d not available\n", + dev->dma); goto out2; } } - printk(KERN_DEBUG "%s: IO: %#4lx IRQ: %d DMA: %d\n", - dev->name, dev->base_addr, dev->irq, dev->dma); - err = register_netdev(dev); if (err) goto out; + printk(KERN_DEBUG "%s: IO: %#4lx IRQ: %d DMA: %d\n", + dev->name, dev->base_addr, dev->irq, dev->dma); + return 0; out: free_dma(dev->dma); @@ -258,34 +258,11 @@ out2: out3: tmsdev_term(dev); out4: - release_region(dev->base_addr, PROTEON_IO_EXTENT); + release_region(dev->base_addr, PROTEON_IO_EXTENT); +out5: return err; } -struct net_device * __init proteon_probe(int unit) -{ - struct net_device *dev = alloc_trdev(sizeof(struct net_local)); - int err = 0; - - if (!dev) - return ERR_PTR(-ENOMEM); - - if (unit >= 0) { - sprintf(dev->name, "tr%d", unit); - netdev_boot_setup_check(dev); - } - - err = setup_card(dev); - if (err) - goto out; - - return dev; - -out: - free_netdev(dev); - return ERR_PTR(err); -} - /* * Reads MAC address from adapter RAM, which should've read it from * the onboard ROM. @@ -352,8 +329,6 @@ static int proteon_open(struct net_device *dev) return tms380tr_open(dev); } -#ifdef MODULE - #define ISATR_MAX_ADAPTERS 3 static int io[ISATR_MAX_ADAPTERS]; @@ -366,13 +341,23 @@ module_param_array(io, int, NULL, 0); module_param_array(irq, int, NULL, 0); module_param_array(dma, int, NULL, 0); -static struct net_device *proteon_dev[ISATR_MAX_ADAPTERS]; +static struct platform_device *proteon_dev[ISATR_MAX_ADAPTERS]; -int init_module(void) +static struct device_driver proteon_driver = { + .name = "proteon", + .bus = &platform_bus_type, +}; + +static int __init proteon_init(void) { struct net_device *dev; + struct platform_device *pdev; int i, num = 0, err = 0; + err = driver_register(&proteon_driver); + if (err) + return err; + for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) { dev = alloc_trdev(sizeof(struct net_local)); if (!dev) @@ -381,11 +366,15 @@ int init_module(void) dev->base_addr = io[i]; dev->irq = irq[i]; dev->dma = dma[i]; - err = setup_card(dev); + pdev = platform_device_register_simple("proteon", + i, NULL, 0); + err = setup_card(dev, &pdev->dev); if (!err) { - proteon_dev[i] = dev; + proteon_dev[i] = pdev; + dev_set_drvdata(&pdev->dev, dev); ++num; } else { + platform_device_unregister(pdev); free_netdev(dev); } } @@ -399,23 +388,28 @@ int init_module(void) return (0); } -void cleanup_module(void) +static void __exit proteon_cleanup(void) { + struct net_device *dev; int i; for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) { - struct net_device *dev = proteon_dev[i]; + struct platform_device *pdev = proteon_dev[i]; - if (!dev) + if (!pdev) continue; - + dev = dev_get_drvdata(&pdev->dev); unregister_netdev(dev); release_region(dev->base_addr, PROTEON_IO_EXTENT); free_irq(dev->irq, dev); free_dma(dev->dma); tmsdev_term(dev); free_netdev(dev); + dev_set_drvdata(&pdev->dev, NULL); + platform_device_unregister(pdev); } + driver_unregister(&proteon_driver); } -#endif /* MODULE */ +module_init(proteon_init); +module_exit(proteon_cleanup); diff --git a/drivers/net/tokenring/skisa.c b/drivers/net/tokenring/skisa.c index f26796e2d0e5..03f061941d77 100644 --- a/drivers/net/tokenring/skisa.c +++ b/drivers/net/tokenring/skisa.c @@ -68,8 +68,7 @@ static int dmalist[] __initdata = { }; static char isa_cardname[] = "SK NET TR 4/16 ISA\0"; - -struct net_device *sk_isa_probe(int unit); +static u64 dma_mask = ISA_MAX_ADDRESS; static int sk_isa_open(struct net_device *dev); static void sk_isa_read_eeprom(struct net_device *dev); static unsigned short sk_isa_setnselout_pins(struct net_device *dev); @@ -133,7 +132,7 @@ static int __init sk_isa_probe1(struct net_device *dev, int ioaddr) return 0; } -static int __init setup_card(struct net_device *dev) +static int __init setup_card(struct net_device *dev, struct device *pdev) { struct net_local *tp; static int versionprinted; @@ -154,7 +153,7 @@ static int __init setup_card(struct net_device *dev) } } if (err) - goto out4; + goto out5; /* At this point we have found a valid card. */ @@ -162,14 +161,15 @@ static int __init setup_card(struct net_device *dev) printk(KERN_DEBUG "%s", version); err = -EIO; - if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL)) + pdev->dma_mask = &dma_mask; + if (tmsdev_init(dev, ISA_MAX_ADDRESS, pdev)) goto out4; dev->base_addr &= ~3; sk_isa_read_eeprom(dev); - printk(KERN_DEBUG "%s: Ring Station Address: ", dev->name); + printk(KERN_DEBUG "skisa.c: Ring Station Address: "); printk("%2.2x", dev->dev_addr[0]); for (j = 1; j < 6; j++) printk(":%2.2x", dev->dev_addr[j]); @@ -202,7 +202,7 @@ static int __init setup_card(struct net_device *dev) if(irqlist[j] == 0) { - printk(KERN_INFO "%s: AutoSelect no IRQ available\n", dev->name); + printk(KERN_INFO "skisa.c: AutoSelect no IRQ available\n"); goto out3; } } @@ -213,15 +213,15 @@ static int __init setup_card(struct net_device *dev) break; if (irqlist[j] == 0) { - printk(KERN_INFO "%s: Illegal IRQ %d specified\n", - dev->name, dev->irq); + printk(KERN_INFO "skisa.c: Illegal IRQ %d specified\n", + dev->irq); goto out3; } if (request_irq(dev->irq, tms380tr_interrupt, 0, isa_cardname, dev)) { - printk(KERN_INFO "%s: Selected IRQ %d not available\n", - dev->name, dev->irq); + printk(KERN_INFO "skisa.c: Selected IRQ %d not available\n", + dev->irq); goto out3; } } @@ -237,7 +237,7 @@ static int __init setup_card(struct net_device *dev) if(dmalist[j] == 0) { - printk(KERN_INFO "%s: AutoSelect no DMA available\n", dev->name); + printk(KERN_INFO "skisa.c: AutoSelect no DMA available\n"); goto out2; } } @@ -248,25 +248,25 @@ static int __init setup_card(struct net_device *dev) break; if (dmalist[j] == 0) { - printk(KERN_INFO "%s: Illegal DMA %d specified\n", - dev->name, dev->dma); + printk(KERN_INFO "skisa.c: Illegal DMA %d specified\n", + dev->dma); goto out2; } if (request_dma(dev->dma, isa_cardname)) { - printk(KERN_INFO "%s: Selected DMA %d not available\n", - dev->name, dev->dma); + printk(KERN_INFO "skisa.c: Selected DMA %d not available\n", + dev->dma); goto out2; } } - printk(KERN_DEBUG "%s: IO: %#4lx IRQ: %d DMA: %d\n", - dev->name, dev->base_addr, dev->irq, dev->dma); - err = register_netdev(dev); if (err) goto out; + printk(KERN_DEBUG "%s: IO: %#4lx IRQ: %d DMA: %d\n", + dev->name, dev->base_addr, dev->irq, dev->dma); + return 0; out: free_dma(dev->dma); @@ -275,33 +275,11 @@ out2: out3: tmsdev_term(dev); out4: - release_region(dev->base_addr, SK_ISA_IO_EXTENT); + release_region(dev->base_addr, SK_ISA_IO_EXTENT); +out5: return err; } -struct net_device * __init sk_isa_probe(int unit) -{ - struct net_device *dev = alloc_trdev(sizeof(struct net_local)); - int err = 0; - - if (!dev) - return ERR_PTR(-ENOMEM); - - if (unit >= 0) { - sprintf(dev->name, "tr%d", unit); - netdev_boot_setup_check(dev); - } - - err = setup_card(dev); - if (err) - goto out; - - return dev; -out: - free_netdev(dev); - return ERR_PTR(err); -} - /* * Reads MAC address from adapter RAM, which should've read it from * the onboard ROM. @@ -361,8 +339,6 @@ static int sk_isa_open(struct net_device *dev) return tms380tr_open(dev); } -#ifdef MODULE - #define ISATR_MAX_ADAPTERS 3 static int io[ISATR_MAX_ADAPTERS]; @@ -375,13 +351,23 @@ module_param_array(io, int, NULL, 0); module_param_array(irq, int, NULL, 0); module_param_array(dma, int, NULL, 0); -static struct net_device *sk_isa_dev[ISATR_MAX_ADAPTERS]; +static struct platform_device *sk_isa_dev[ISATR_MAX_ADAPTERS]; -int init_module(void) +static struct device_driver sk_isa_driver = { + .name = "skisa", + .bus = &platform_bus_type, +}; + +static int __init sk_isa_init(void) { struct net_device *dev; + struct platform_device *pdev; int i, num = 0, err = 0; + err = driver_register(&sk_isa_driver); + if (err) + return err; + for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) { dev = alloc_trdev(sizeof(struct net_local)); if (!dev) @@ -390,12 +376,15 @@ int init_module(void) dev->base_addr = io[i]; dev->irq = irq[i]; dev->dma = dma[i]; - err = setup_card(dev); - + pdev = platform_device_register_simple("skisa", + i, NULL, 0); + err = setup_card(dev, &pdev->dev); if (!err) { - sk_isa_dev[i] = dev; + sk_isa_dev[i] = pdev; + dev_set_drvdata(&sk_isa_dev[i]->dev, dev); ++num; } else { + platform_device_unregister(pdev); free_netdev(dev); } } @@ -409,23 +398,28 @@ int init_module(void) return (0); } -void cleanup_module(void) +static void __exit sk_isa_cleanup(void) { + struct net_device *dev; int i; for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) { - struct net_device *dev = sk_isa_dev[i]; + struct platform_device *pdev = sk_isa_dev[i]; - if (!dev) + if (!pdev) continue; - + dev = dev_get_drvdata(&pdev->dev); unregister_netdev(dev); release_region(dev->base_addr, SK_ISA_IO_EXTENT); free_irq(dev->irq, dev); free_dma(dev->dma); tmsdev_term(dev); free_netdev(dev); + dev_set_drvdata(&pdev->dev, NULL); + platform_device_unregister(pdev); } + driver_unregister(&sk_isa_driver); } -#endif /* MODULE */ +module_init(sk_isa_init); +module_exit(sk_isa_cleanup); diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c index 5e0b0ce98ed7..9a543fe2d0e6 100644 --- a/drivers/net/tokenring/tms380tr.c +++ b/drivers/net/tokenring/tms380tr.c @@ -62,6 +62,7 @@ * normal operation. * 30-Dec-02 JF Removed incorrect __init from * tms380tr_init_card. + * 22-Jul-05 JF Converted to dma-mapping. * * To do: * 1. Multi/Broadcast packet handling (this may have fixed itself) @@ -89,7 +90,7 @@ static const char version[] = "tms380tr.c: v1.10 30/12/2002 by Christoph Goos, A #include #include #include -#include +#include #include #include #include @@ -114,8 +115,6 @@ static const char version[] = "tms380tr.c: v1.10 30/12/2002 by Christoph Goos, A #endif static unsigned int tms380tr_debug = TMS380TR_DEBUG; -static struct device tms_device; - /* Index to functions, as function prototypes. * Alphabetical by function name. */ @@ -434,7 +433,7 @@ static void tms380tr_init_net_local(struct net_device *dev) skb_put(tp->Rpl[i].Skb, tp->MaxPacketSize); /* data unreachable for DMA ? then use local buffer */ - dmabuf = pci_map_single(tp->pdev, tp->Rpl[i].Skb->data, tp->MaxPacketSize, PCI_DMA_FROMDEVICE); + dmabuf = dma_map_single(tp->pdev, tp->Rpl[i].Skb->data, tp->MaxPacketSize, DMA_FROM_DEVICE); if(tp->dmalimit && (dmabuf + tp->MaxPacketSize > tp->dmalimit)) { tp->Rpl[i].SkbStat = SKB_DATA_COPY; @@ -638,10 +637,10 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device /* Is buffer reachable for Busmaster-DMA? */ length = skb->len; - dmabuf = pci_map_single(tp->pdev, skb->data, length, PCI_DMA_TODEVICE); + dmabuf = dma_map_single(tp->pdev, skb->data, length, DMA_TO_DEVICE); if(tp->dmalimit && (dmabuf + length > tp->dmalimit)) { /* Copy frame to local buffer */ - pci_unmap_single(tp->pdev, dmabuf, length, PCI_DMA_TODEVICE); + dma_unmap_single(tp->pdev, dmabuf, length, DMA_TO_DEVICE); dmabuf = 0; i = tp->TplFree->TPLIndex; buf = tp->LocalTxBuffers[i]; @@ -1284,9 +1283,7 @@ static int tms380tr_reset_adapter(struct net_device *dev) unsigned short count, c, count2; const struct firmware *fw_entry = NULL; - strncpy(tms_device.bus_id,dev->name, BUS_ID_SIZE); - - if (request_firmware(&fw_entry, "tms380tr.bin", &tms_device) != 0) { + if (request_firmware(&fw_entry, "tms380tr.bin", tp->pdev) != 0) { printk(KERN_ALERT "%s: firmware %s is missing, cannot start.\n", dev->name, "tms380tr.bin"); return (-1); @@ -2021,7 +2018,7 @@ static void tms380tr_cancel_tx_queue(struct net_local* tp) printk(KERN_INFO "Cancel tx (%08lXh).\n", (unsigned long)tpl); if (tpl->DMABuff) - pci_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tpl->Skb); } @@ -2090,7 +2087,7 @@ static void tms380tr_tx_status_irq(struct net_device *dev) tp->MacStat.tx_packets++; if (tpl->DMABuff) - pci_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, DMA_TO_DEVICE); dev_kfree_skb_irq(tpl->Skb); tpl->BusyFlag = 0; /* "free" TPL */ } @@ -2209,7 +2206,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev) tp->MacStat.rx_errors++; } if (rpl->DMABuff) - pci_unmap_single(tp->pdev, rpl->DMABuff, tp->MaxPacketSize, PCI_DMA_TODEVICE); + dma_unmap_single(tp->pdev, rpl->DMABuff, tp->MaxPacketSize, DMA_TO_DEVICE); rpl->DMABuff = 0; /* Allocate new skb for rpl */ @@ -2227,7 +2224,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev) skb_put(rpl->Skb, tp->MaxPacketSize); /* Data unreachable for DMA ? then use local buffer */ - dmabuf = pci_map_single(tp->pdev, rpl->Skb->data, tp->MaxPacketSize, PCI_DMA_FROMDEVICE); + dmabuf = dma_map_single(tp->pdev, rpl->Skb->data, tp->MaxPacketSize, DMA_FROM_DEVICE); if(tp->dmalimit && (dmabuf + tp->MaxPacketSize > tp->dmalimit)) { rpl->SkbStat = SKB_DATA_COPY; @@ -2332,12 +2329,12 @@ void tmsdev_term(struct net_device *dev) struct net_local *tp; tp = netdev_priv(dev); - pci_unmap_single(tp->pdev, tp->dmabuffer, sizeof(struct net_local), - PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(tp->pdev, tp->dmabuffer, sizeof(struct net_local), + DMA_BIDIRECTIONAL); } int tmsdev_init(struct net_device *dev, unsigned long dmalimit, - struct pci_dev *pdev) + struct device *pdev) { struct net_local *tms_local; @@ -2346,8 +2343,8 @@ int tmsdev_init(struct net_device *dev, unsigned long dmalimit, init_waitqueue_head(&tms_local->wait_for_tok_int); tms_local->dmalimit = dmalimit; tms_local->pdev = pdev; - tms_local->dmabuffer = pci_map_single(pdev, (void *)tms_local, - sizeof(struct net_local), PCI_DMA_BIDIRECTIONAL); + tms_local->dmabuffer = dma_map_single(pdev, (void *)tms_local, + sizeof(struct net_local), DMA_BIDIRECTIONAL); if (tms_local->dmabuffer + sizeof(struct net_local) > dmalimit) { printk(KERN_INFO "%s: Memory not accessible for DMA\n", @@ -2370,8 +2367,6 @@ int tmsdev_init(struct net_device *dev, unsigned long dmalimit, return 0; } -#ifdef MODULE - EXPORT_SYMBOL(tms380tr_open); EXPORT_SYMBOL(tms380tr_close); EXPORT_SYMBOL(tms380tr_interrupt); @@ -2379,6 +2374,8 @@ EXPORT_SYMBOL(tmsdev_init); EXPORT_SYMBOL(tmsdev_term); EXPORT_SYMBOL(tms380tr_wait); +#ifdef MODULE + static struct module *TMS380_module = NULL; int init_module(void) diff --git a/drivers/net/tokenring/tms380tr.h b/drivers/net/tokenring/tms380tr.h index f2c5ba0f37a5..077f568d89d1 100644 --- a/drivers/net/tokenring/tms380tr.h +++ b/drivers/net/tokenring/tms380tr.h @@ -18,7 +18,7 @@ int tms380tr_open(struct net_device *dev); int tms380tr_close(struct net_device *dev); irqreturn_t tms380tr_interrupt(int irq, void *dev_id, struct pt_regs *regs); int tmsdev_init(struct net_device *dev, unsigned long dmalimit, - struct pci_dev *pdev); + struct device *pdev); void tmsdev_term(struct net_device *dev); void tms380tr_wait(unsigned long time); @@ -719,7 +719,7 @@ struct s_TPL { /* Transmit Parameter List (align on even word boundaries) */ struct sk_buff *Skb; unsigned char TPLIndex; volatile unsigned char BusyFlag;/* Flag: TPL busy? */ - dma_addr_t DMABuff; /* DMA IO bus address from pci_map */ + dma_addr_t DMABuff; /* DMA IO bus address from dma_map */ }; /* ---------------------Receive Functions-------------------------------* @@ -1060,7 +1060,7 @@ struct s_RPL { /* Receive Parameter List */ struct sk_buff *Skb; SKB_STAT SkbStat; int RPLIndex; - dma_addr_t DMABuff; /* DMA IO bus address from pci_map */ + dma_addr_t DMABuff; /* DMA IO bus address from dma_map */ }; /* Information that need to be kept for each board. */ @@ -1091,7 +1091,7 @@ typedef struct net_local { RPL *RplTail; unsigned char LocalRxBuffers[RPL_NUM][DEFAULT_PACKET_SIZE]; - struct pci_dev *pdev; + struct device *pdev; int DataRate; unsigned char ScbInUse; unsigned short CMDqueue; diff --git a/drivers/net/tokenring/tmspci.c b/drivers/net/tokenring/tmspci.c index 2e18c0a46482..0014aef5c744 100644 --- a/drivers/net/tokenring/tmspci.c +++ b/drivers/net/tokenring/tmspci.c @@ -100,7 +100,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic unsigned int pci_irq_line; unsigned long pci_ioaddr; struct card_info *cardinfo = &card_info_table[ent->driver_data]; - + if (versionprinted++ == 0) printk("%s", version); @@ -143,7 +143,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic printk(":%2.2x", dev->dev_addr[i]); printk("\n"); - ret = tmsdev_init(dev, PCI_MAX_ADDRESS, pdev); + ret = tmsdev_init(dev, PCI_MAX_ADDRESS, &pdev->dev); if (ret) { printk("%s: unable to get memory for dev->priv.\n", dev->name); goto err_out_irq; From 6b9b97ce70b789014515f808b1b64c8e29e300d1 Mon Sep 17 00:00:00 2001 From: Peter Hagervall Date: Wed, 27 Jul 2005 01:14:46 -0700 Subject: [PATCH 042/584] [PATCH] orinoco: Sparse fixes A few sparse cleanups for orinoco.c Signed-off-by: Peter Hagervall Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/wireless/orinoco.c | 78 +++++++++++++++++----------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c index aabcdc2be05e..9c2d07cde010 100644 --- a/drivers/net/wireless/orinoco.c +++ b/drivers/net/wireless/orinoco.c @@ -4322,36 +4322,36 @@ static const struct iw_priv_args orinoco_privtab[] = { */ static const iw_handler orinoco_handler[] = { - [SIOCSIWCOMMIT-SIOCIWFIRST] (iw_handler) orinoco_ioctl_commit, - [SIOCGIWNAME -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getname, - [SIOCSIWFREQ -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setfreq, - [SIOCGIWFREQ -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getfreq, - [SIOCSIWMODE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setmode, - [SIOCGIWMODE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getmode, - [SIOCSIWSENS -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setsens, - [SIOCGIWSENS -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getsens, - [SIOCGIWRANGE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getiwrange, - [SIOCSIWSPY -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setspy, - [SIOCGIWSPY -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getspy, - [SIOCSIWAP -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setwap, - [SIOCGIWAP -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getwap, - [SIOCSIWSCAN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setscan, - [SIOCGIWSCAN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getscan, - [SIOCSIWESSID -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setessid, - [SIOCGIWESSID -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getessid, - [SIOCSIWNICKN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setnick, - [SIOCGIWNICKN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getnick, - [SIOCSIWRATE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setrate, - [SIOCGIWRATE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getrate, - [SIOCSIWRTS -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setrts, - [SIOCGIWRTS -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getrts, - [SIOCSIWFRAG -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setfrag, - [SIOCGIWFRAG -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getfrag, - [SIOCGIWRETRY -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getretry, - [SIOCSIWENCODE-SIOCIWFIRST] (iw_handler) orinoco_ioctl_setiwencode, - [SIOCGIWENCODE-SIOCIWFIRST] (iw_handler) orinoco_ioctl_getiwencode, - [SIOCSIWPOWER -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setpower, - [SIOCGIWPOWER -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getpower, + [SIOCSIWCOMMIT-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_commit, + [SIOCGIWNAME -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getname, + [SIOCSIWFREQ -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setfreq, + [SIOCGIWFREQ -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getfreq, + [SIOCSIWMODE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setmode, + [SIOCGIWMODE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getmode, + [SIOCSIWSENS -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setsens, + [SIOCGIWSENS -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getsens, + [SIOCGIWRANGE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getiwrange, + [SIOCSIWSPY -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setspy, + [SIOCGIWSPY -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getspy, + [SIOCSIWAP -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setwap, + [SIOCGIWAP -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getwap, + [SIOCSIWSCAN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setscan, + [SIOCGIWSCAN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getscan, + [SIOCSIWESSID -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setessid, + [SIOCGIWESSID -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getessid, + [SIOCSIWNICKN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setnick, + [SIOCGIWNICKN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getnick, + [SIOCSIWRATE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setrate, + [SIOCGIWRATE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getrate, + [SIOCSIWRTS -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setrts, + [SIOCGIWRTS -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getrts, + [SIOCSIWFRAG -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setfrag, + [SIOCGIWFRAG -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getfrag, + [SIOCGIWRETRY -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getretry, + [SIOCSIWENCODE-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setiwencode, + [SIOCGIWENCODE-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getiwencode, + [SIOCSIWPOWER -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setpower, + [SIOCGIWPOWER -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getpower, }; @@ -4359,15 +4359,15 @@ static const iw_handler orinoco_handler[] = { Added typecasting since we no longer use iwreq_data -- Moustafa */ static const iw_handler orinoco_private_handler[] = { - [0] (iw_handler) orinoco_ioctl_reset, - [1] (iw_handler) orinoco_ioctl_reset, - [2] (iw_handler) orinoco_ioctl_setport3, - [3] (iw_handler) orinoco_ioctl_getport3, - [4] (iw_handler) orinoco_ioctl_setpreamble, - [5] (iw_handler) orinoco_ioctl_getpreamble, - [6] (iw_handler) orinoco_ioctl_setibssport, - [7] (iw_handler) orinoco_ioctl_getibssport, - [9] (iw_handler) orinoco_ioctl_getrid, + [0] = (iw_handler) orinoco_ioctl_reset, + [1] = (iw_handler) orinoco_ioctl_reset, + [2] = (iw_handler) orinoco_ioctl_setport3, + [3] = (iw_handler) orinoco_ioctl_getport3, + [4] = (iw_handler) orinoco_ioctl_setpreamble, + [5] = (iw_handler) orinoco_ioctl_getpreamble, + [6] = (iw_handler) orinoco_ioctl_setibssport, + [7] = (iw_handler) orinoco_ioctl_getibssport, + [9] = (iw_handler) orinoco_ioctl_getrid, }; static const struct iw_handler_def orinoco_handler_def = { From cd8749b4aa6b7502e234d72cb53c00a3bc27ed1b Mon Sep 17 00:00:00 2001 From: Marcelo Feitoza Parisi Date: Fri, 15 Jul 2005 11:16:42 +0100 Subject: [PATCH 043/584] [PATCH] Use time_before in hamradio drivers Use of time_before() macro, defined at linux/jiffies.h, which deal with wrapping correctly and are nicer to read. Signed-off-by: Marcelo Feitoza Parisi Signed-off-by: Domen Puncer Signed-off-by: Ralf Baechle DL5RB baycom_epp.c | 3 ++- baycom_par.c | 3 ++- baycom_ser_fdx.c | 3 ++- baycom_ser_hdx.c | 3 ++- mkiss.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) Signed-off-by: Jeff Garzik --- drivers/net/hamradio/baycom_epp.c | 3 ++- drivers/net/hamradio/baycom_par.c | 3 ++- drivers/net/hamradio/baycom_ser_fdx.c | 3 ++- drivers/net/hamradio/baycom_ser_hdx.c | 3 ++- drivers/net/hamradio/mkiss.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index a7f15d9f13e5..5298096afbdb 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -54,6 +54,7 @@ #include #include #include +#include #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) /* prototypes for ax25_encapsulate and ax25_rebuild_header */ #include @@ -287,7 +288,7 @@ static inline void baycom_int_freq(struct baycom_state *bc) * measure the interrupt frequency */ bc->debug_vals.cur_intcnt++; - if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) { bc->debug_vals.last_jiffies = cur_jiffies; bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; bc->debug_vals.cur_intcnt = 0; diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 612ad452bee0..3b1bef1ee215 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -165,7 +166,7 @@ static void __inline__ baycom_int_freq(struct baycom_state *bc) * measure the interrupt frequency */ bc->debug_vals.cur_intcnt++; - if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) { bc->debug_vals.last_jiffies = cur_jiffies; bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; bc->debug_vals.cur_intcnt = 0; diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 25f270b05378..232793d2ce6b 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -79,6 +79,7 @@ #include #include #include +#include /* --------------------------------------------------------------------- */ @@ -159,7 +160,7 @@ static inline void baycom_int_freq(struct baycom_state *bc) * measure the interrupt frequency */ bc->debug_vals.cur_intcnt++; - if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) { bc->debug_vals.last_jiffies = cur_jiffies; bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; bc->debug_vals.cur_intcnt = 0; diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index eead85d00962..be596a3eb3fd 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -69,6 +69,7 @@ #include #include #include +#include /* --------------------------------------------------------------------- */ @@ -150,7 +151,7 @@ static inline void baycom_int_freq(struct baycom_state *bc) * measure the interrupt frequency */ bc->debug_vals.cur_intcnt++; - if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) { bc->debug_vals.last_jiffies = cur_jiffies; bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; bc->debug_vals.cur_intcnt = 0; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 3035422f5ad8..e94952e799fe 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -429,7 +430,7 @@ static int ax_xmit(struct sk_buff *skb, struct net_device *dev) * May be we must check transmitter timeout here ? * 14 Oct 1994 Dmitry Gorodchanin. */ - if (jiffies - dev->trans_start < 20 * HZ) { + if (time_before(jiffies, dev->trans_start + 20 * HZ)) { /* 20 sec timeout not reached */ return 1; } From 2f761478a2b436efa23659b4d5c826e53b11f91a Mon Sep 17 00:00:00 2001 From: Victor Fusco Date: Fri, 1 Jul 2005 00:03:12 +0200 Subject: [PATCH 044/584] [PATCH] drivers/net/pci-skeleton.c: MODULE_PARM -> module_param Use module_param() instead of the old MODULE_PARM() Signed-off-by: Victor Fusco Signed-off-by: Domen Puncer Signed-off-by: Jeff Garzik --- drivers/net/pci-skeleton.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index 4a391ea0f58a..a1ac4bd1696e 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -486,9 +486,9 @@ struct netdrv_private { MODULE_AUTHOR ("Jeff Garzik "); MODULE_DESCRIPTION ("Skeleton for a PCI Fast Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_PARM (multicast_filter_limit, "i"); -MODULE_PARM (max_interrupt_work, "i"); -MODULE_PARM (media, "1-" __MODULE_STRING(8) "i"); +module_param(multicast_filter_limit, int, 0); +module_param(max_interrupt_work, int, 0); +module_param_array(media, int, NULL, 0); MODULE_PARM_DESC (multicast_filter_limit, "pci-skeleton maximum number of filtered multicast addresses"); MODULE_PARM_DESC (max_interrupt_work, "pci-skeleton maximum events handled per interrupt"); MODULE_PARM_DESC (media, "pci-skeleton: Bits 0-3: media type, bit 17: full duplex"); From 541134cfe7af179f45458b68421ee1da7bab9cba Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sun, 3 Jul 2005 13:44:39 +0100 Subject: [PATCH 045/584] [PATCH] sata_nv: Support MCP51/MCP55 device IDs This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/scsi/sata_nv.c | 21 +++++++++++++++++++-- include/linux/pci_ids.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index b0403ccd8a25..9b9142790bd6 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -20,6 +20,12 @@ * If you do not delete the provisions above, a recipient may use your * version of this file under either the OSL or the GPL. * + * 0.08 + * - Added support for MCP51 and MCP55. + * + * 0.07 + * - Added support for RAID class code. + * * 0.06 * - Added generic SATA support by using a pci_device_id that filters on * the IDE storage class code. @@ -48,7 +54,7 @@ #include #define DRV_NAME "sata_nv" -#define DRV_VERSION "0.6" +#define DRV_VERSION "0.8" #define NV_PORTS 2 #define NV_PIO_MASK 0x1f @@ -116,7 +122,9 @@ enum nv_host_type GENERIC, NFORCE2, NFORCE3, - CK804 + CK804, + MCP51, + MCP55 }; static struct pci_device_id nv_pci_tbl[] = { @@ -134,9 +142,18 @@ static struct pci_device_id nv_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC }, + { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_RAID<<8, 0xffff00, GENERIC }, { 0, } /* terminate list */ }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bc4cc10fabe9..639291fe8ac0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1249,6 +1249,7 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B From d2ae1d2ff9282ca061b6f5244eee4c28ee2b3ffa Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Sat, 2 Jul 2005 21:28:21 -0400 Subject: [PATCH 046/584] [PATCH] loopback: #ifdef the TSO code This patch #ifdefs the TSO code in the loopback driver. Saves ~800 bytes of text on i386 and avoids a conditional in the fast path. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Jeff Garzik --- drivers/net/loopback.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index b33111e21313..c1e3cee8ec33 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -68,6 +68,7 @@ static DEFINE_PER_CPU(struct net_device_stats, loopback_stats); * of largesending device modulo TCP checksum, which is ignored for loopback. */ +#ifdef LOOPBACK_TSO static void emulate_large_send_offload(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; @@ -119,6 +120,7 @@ static void emulate_large_send_offload(struct sk_buff *skb) dev_kfree_skb(skb); } +#endif /* LOOPBACK_TSO */ /* * The higher levels take care of making this non-reentrant (it's @@ -136,6 +138,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) skb->ip_summed = CHECKSUM_UNNECESSARY; #endif +#ifdef LOOPBACK_TSO if (skb_shinfo(skb)->tso_size) { BUG_ON(skb->protocol != htons(ETH_P_IP)); BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); @@ -143,7 +146,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) emulate_large_send_offload(skb); return 0; } - +#endif dev->last_rx = jiffies; lb_stats = &per_cpu(loopback_stats, get_cpu()); @@ -209,6 +212,9 @@ struct net_device loopback_dev = { .rebuild_header = eth_rebuild_header, .flags = IFF_LOOPBACK, .features = NETIF_F_SG|NETIF_F_FRAGLIST +#ifdef LOOPBACK_TSO + |NETIF_F_TSO +#endif |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA |NETIF_F_LLTX, .ethtool_ops = &loopback_ethtool_ops, From 18c16c696e8b2323a306af455c686df15c717206 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Sat, 2 Jul 2005 21:28:22 -0400 Subject: [PATCH 047/584] [PATCH] loopback: optimize stats This patch slightly optimizes the loopback driver's stats update. Saves two loads, one add and one increment per packet sent. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Jeff Garzik --- drivers/net/loopback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c1e3cee8ec33..dba76169e774 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -151,9 +151,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) lb_stats = &per_cpu(loopback_stats, get_cpu()); lb_stats->rx_bytes += skb->len; - lb_stats->tx_bytes += skb->len; + lb_stats->tx_bytes = lb_stats->rx_bytes; lb_stats->rx_packets++; - lb_stats->tx_packets++; + lb_stats->tx_packets = lb_stats->rx_packets; put_cpu(); netif_rx(skb); From 0e920bfb0395fb16909fb98cb6e2782a1c6b73c7 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Sat, 2 Jul 2005 21:28:23 -0400 Subject: [PATCH 048/584] [PATCH] loopback: whitespace cleanup Whitespace cleanup for loopback driver. Hopefully it fixes the last few annoyances. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Jeff Garzik --- drivers/net/loopback.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index dba76169e774..2cb6f1c8c6ed 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -132,8 +132,8 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); - skb->protocol=eth_type_trans(skb,dev); - skb->dev=dev; + skb->protocol = eth_type_trans(skb,dev); + skb->dev = dev; #ifndef LOOPBACK_MUST_CHECKSUM skb->ip_summed = CHECKSUM_UNNECESSARY; #endif @@ -211,12 +211,12 @@ struct net_device loopback_dev = { .type = ARPHRD_LOOPBACK, /* 0x0001*/ .rebuild_header = eth_rebuild_header, .flags = IFF_LOOPBACK, - .features = NETIF_F_SG|NETIF_F_FRAGLIST + .features = NETIF_F_SG | NETIF_F_FRAGLIST #ifdef LOOPBACK_TSO - |NETIF_F_TSO + | NETIF_F_TSO #endif - |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA - |NETIF_F_LLTX, + | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA + | NETIF_F_LLTX, .ethtool_ops = &loopback_ethtool_ops, }; From d81c0983de80c956cf37835b0d35adb3ab4bb03a Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:20:30 +0200 Subject: [PATCH 049/584] [PATCH] forcedeth: Jumbo Frame Support This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 125 ++++++++++++++++++++++++++++++++++------ 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 64f0f697c958..91f09e583cea 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -85,6 +85,7 @@ * 0.33: 16 May 2005: Support for MCP51 added. * 0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics. * 0.35: 26 Jun 2005: Support for MCP55 added. + * 0.36: 28 Jul 2005: Add jumbo frame support. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -96,7 +97,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.35" +#define FORCEDETH_VERSION "0.36" #define DRV_NAME "forcedeth" #include @@ -379,9 +380,13 @@ struct ring_desc { #define TX_LIMIT_START 62 /* rx/tx mac addr + type + vlan + align + slack*/ -#define RX_NIC_BUFSIZE (ETH_DATA_LEN + 64) -/* even more slack */ -#define RX_ALLOC_BUFSIZE (ETH_DATA_LEN + 128) +#define NV_RX_HEADERS (64) +/* even more slack. */ +#define NV_RX_ALLOC_PAD (64) + +/* maximum mtu size */ +#define NV_PKTLIMIT_1 ETH_DATA_LEN /* hard limit not known */ +#define NV_PKTLIMIT_2 9100 /* Actual limit according to NVidia: 9202 */ #define OOM_REFILL (1+HZ/20) #define POLL_WAIT (1+HZ/100) @@ -473,6 +478,7 @@ struct fe_priv { struct sk_buff *rx_skbuff[RX_RING]; dma_addr_t rx_dma[RX_RING]; unsigned int rx_buf_sz; + unsigned int pkt_limit; struct timer_list oom_kick; struct timer_list nic_poll; @@ -792,7 +798,7 @@ static int nv_alloc_rx(struct net_device *dev) nr = refill_rx % RX_RING; if (np->rx_skbuff[nr] == NULL) { - skb = dev_alloc_skb(RX_ALLOC_BUFSIZE); + skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD); if (!skb) break; @@ -805,7 +811,7 @@ static int nv_alloc_rx(struct net_device *dev) PCI_DMA_FROMDEVICE); np->rx_ring[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]); wmb(); - np->rx_ring[nr].FlagLen = cpu_to_le32(RX_NIC_BUFSIZE | NV_RX_AVAIL); + np->rx_ring[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n", dev->name, refill_rx); refill_rx++; @@ -831,7 +837,18 @@ static void nv_do_rx_refill(unsigned long data) enable_irq(dev->irq); } -static int nv_init_ring(struct net_device *dev) +static void nv_init_rx(struct net_device *dev) +{ + struct fe_priv *np = get_nvpriv(dev); + int i; + + np->cur_rx = RX_RING; + np->refill_rx = 0; + for (i = 0; i < RX_RING; i++) + np->rx_ring[i].FlagLen = 0; +} + +static void nv_init_tx(struct net_device *dev) { struct fe_priv *np = get_nvpriv(dev); int i; @@ -839,11 +856,12 @@ static int nv_init_ring(struct net_device *dev) np->next_tx = np->nic_tx = 0; for (i = 0; i < TX_RING; i++) np->tx_ring[i].FlagLen = 0; +} - np->cur_rx = RX_RING; - np->refill_rx = 0; - for (i = 0; i < RX_RING; i++) - np->rx_ring[i].FlagLen = 0; +static int nv_init_ring(struct net_device *dev) +{ + nv_init_tx(dev); + nv_init_rx(dev); return nv_alloc_rx(dev); } @@ -1207,15 +1225,82 @@ next_pkt: } } +static void set_bufsize(struct net_device *dev) +{ + struct fe_priv *np = netdev_priv(dev); + + if (dev->mtu <= ETH_DATA_LEN) + np->rx_buf_sz = ETH_DATA_LEN + NV_RX_HEADERS; + else + np->rx_buf_sz = dev->mtu + NV_RX_HEADERS; +} + /* * nv_change_mtu: dev->change_mtu function * Called with dev_base_lock held for read. */ static int nv_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu > ETH_DATA_LEN) + struct fe_priv *np = get_nvpriv(dev); + int old_mtu; + + if (new_mtu < 64 || new_mtu > np->pkt_limit) return -EINVAL; + + old_mtu = dev->mtu; dev->mtu = new_mtu; + + /* return early if the buffer sizes will not change */ + if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN) + return 0; + if (old_mtu == new_mtu) + return 0; + + /* synchronized against open : rtnl_lock() held by caller */ + if (netif_running(dev)) { + u8 *base = get_hwbase(dev); + /* + * It seems that the nic preloads valid ring entries into an + * internal buffer. The procedure for flushing everything is + * guessed, there is probably a simpler approach. + * Changing the MTU is a rare event, it shouldn't matter. + */ + disable_irq(dev->irq); + spin_lock_bh(&dev->xmit_lock); + spin_lock(&np->lock); + /* stop engines */ + nv_stop_rx(dev); + nv_stop_tx(dev); + nv_txrx_reset(dev); + /* drain rx queue */ + nv_drain_rx(dev); + nv_drain_tx(dev); + /* reinit driver view of the rx queue */ + nv_init_rx(dev); + nv_init_tx(dev); + /* alloc new rx buffers */ + set_bufsize(dev); + if (nv_alloc_rx(dev)) { + if (!np->in_shutdown) + mod_timer(&np->oom_kick, jiffies + OOM_REFILL); + } + /* reinit nic view of the rx queue */ + writel(np->rx_buf_sz, base + NvRegOffloadConfig); + writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), + base + NvRegRingSizes); + pci_push(base); + writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl); + pci_push(base); + + /* restart rx engine */ + nv_start_rx(dev); + nv_start_tx(dev); + spin_unlock(&np->lock); + spin_unlock_bh(&dev->xmit_lock); + enable_irq(dev->irq); + } return 0; } @@ -1792,6 +1877,7 @@ static int nv_open(struct net_device *dev) writel(0, base + NvRegAdapterControl); /* 2) initialize descriptor rings */ + set_bufsize(dev); oom = nv_init_ring(dev); writel(0, base + NvRegLinkSpeed); @@ -1837,7 +1923,7 @@ static int nv_open(struct net_device *dev) writel(NVREG_MISC1_FORCE | NVREG_MISC1_HD, base + NvRegMisc1); writel(readl(base + NvRegTransmitterStatus), base + NvRegTransmitterStatus); writel(NVREG_PFF_ALWAYS, base + NvRegPacketFilterFlags); - writel(NVREG_OFFLOAD_NORMAL, base + NvRegOffloadConfig); + writel(np->rx_buf_sz, base + NvRegOffloadConfig); writel(readl(base + NvRegReceiverStatus), base + NvRegReceiverStatus); get_random_bytes(&i, sizeof(i)); @@ -2007,13 +2093,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i /* handle different descriptor versions */ if (pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_1 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_2 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_3 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_12 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_13) + pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_2 || + pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_3 || + pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_12 || + pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_13) { np->desc_ver = DESC_VER_1; - else + np->pkt_limit = NV_PKTLIMIT_1; + } else { np->desc_ver = DESC_VER_2; + np->pkt_limit = NV_PKTLIMIT_2; + } err = -ENOMEM; np->base = ioremap(addr, NV_PCI_REGSZ); From dc8216c192795b62f30ca34299fb79e897438372 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:26:05 +0200 Subject: [PATCH 050/584] [PATCH] forcedeth: Improve ethtool support This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 175 +++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 83 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 91f09e583cea..9c49c5ec89bf 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -85,7 +85,8 @@ * 0.33: 16 May 2005: Support for MCP51 added. * 0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics. * 0.35: 26 Jun 2005: Support for MCP55 added. - * 0.36: 28 Jul 2005: Add jumbo frame support. + * 0.36: 28 Jun 2005: Add jumbo frame support. + * 0.37: 10 Jul 2005: Additional ethtool support, cleanup of pci id list * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -97,7 +98,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.36" +#define FORCEDETH_VERSION "0.37" #define DRV_NAME "forcedeth" #include @@ -137,6 +138,7 @@ #define DEV_IRQMASK_2 0x0004 /* use NVREG_IRQMASK_WANTED_2 for irq mask */ #define DEV_NEED_TIMERIRQ 0x0008 /* set the timer irq flag in the irq mask */ #define DEV_NEED_LINKTIMER 0x0010 /* poll link settings. Relies on the timer irq */ +#define DEV_HAS_LARGEDESC 0x0020 /* device supports jumbo frames and needs packet format 2 */ enum { NvRegIrqStatus = 0x000, @@ -1846,6 +1848,50 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) return 0; } +#define FORCEDETH_REGS_VER 1 +#define FORCEDETH_REGS_SIZE 0x400 /* 256 32-bit registers */ + +static int nv_get_regs_len(struct net_device *dev) +{ + return FORCEDETH_REGS_SIZE; +} + +static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf) +{ + struct fe_priv *np = get_nvpriv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 *rbuf = buf; + int i; + + regs->version = FORCEDETH_REGS_VER; + spin_lock_irq(&np->lock); + for (i=0;ilock); +} + +static int nv_nway_reset(struct net_device *dev) +{ + struct fe_priv *np = get_nvpriv(dev); + int ret; + + spin_lock_irq(&np->lock); + if (np->autoneg) { + int bmcr; + + bmcr = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + mii_rw(dev, np->phyaddr, MII_BMCR, bmcr); + + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irq(&np->lock); + + return ret; +} + static struct ethtool_ops ops = { .get_drvinfo = nv_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1853,6 +1899,9 @@ static struct ethtool_ops ops = { .set_wol = nv_set_wol, .get_settings = nv_get_settings, .set_settings = nv_set_settings, + .get_regs_len = nv_get_regs_len, + .get_regs = nv_get_regs, + .nway_reset = nv_nway_reset, }; static int nv_open(struct net_device *dev) @@ -2092,18 +2141,13 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i } /* handle different descriptor versions */ - if (pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_1 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_2 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_3 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_12 || - pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_13) { - np->desc_ver = DESC_VER_1; - np->pkt_limit = NV_PKTLIMIT_1; - } else { + np->desc_ver = DESC_VER_1; + np->pkt_limit = NV_PKTLIMIT_1; + if (id->driver_data & DEV_HAS_LARGEDESC) { np->desc_ver = DESC_VER_2; np->pkt_limit = NV_PKTLIMIT_2; } - + err = -ENOMEM; np->base = ioremap(addr, NV_PCI_REGSZ); if (!np->base) @@ -2284,109 +2328,74 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) static struct pci_device_id pci_tbl[] = { { /* nForce Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_1, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_1), .driver_data = DEV_IRQMASK_1|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce2 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_2), .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce3 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_3, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_3), .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce3 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_4, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_5, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_6, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_7, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* CK804 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_8, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* CK804 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_9, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP04 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_10, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP04 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_11, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + }, + { /* MCP51 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12), .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* MCP51 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_12, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, - }, - { /* MCP51 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_13, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_13), .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* MCP55 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_14, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP55 Ethernet Controller */ - .vendor = PCI_VENDOR_ID_NVIDIA, - .device = PCI_DEVICE_ID_NVIDIA_NVENET_15, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15), + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| + DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, {0,}, }; From c2dba06dae7d6c4d15b83ea12d8c601cffd0aee9 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:29:47 +0200 Subject: [PATCH 051/584] [PATCH] forcedeth: rewritten tx irq handling This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 111 +++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 48 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 9c49c5ec89bf..746ad0178f8c 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -87,6 +87,8 @@ * 0.35: 26 Jun 2005: Support for MCP55 added. * 0.36: 28 Jun 2005: Add jumbo frame support. * 0.37: 10 Jul 2005: Additional ethtool support, cleanup of pci id list + * 0.38: 16 Jul 2005: tx irq rewrite: Use global flags instead of + * per-packet flags. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -98,7 +100,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.37" +#define FORCEDETH_VERSION "0.38" #define DRV_NAME "forcedeth" #include @@ -133,12 +135,9 @@ * Hardware access: */ -#define DEV_NEED_LASTPACKET1 0x0001 /* set LASTPACKET1 in tx flags */ -#define DEV_IRQMASK_1 0x0002 /* use NVREG_IRQMASK_WANTED_1 for irq mask */ -#define DEV_IRQMASK_2 0x0004 /* use NVREG_IRQMASK_WANTED_2 for irq mask */ -#define DEV_NEED_TIMERIRQ 0x0008 /* set the timer irq flag in the irq mask */ -#define DEV_NEED_LINKTIMER 0x0010 /* poll link settings. Relies on the timer irq */ -#define DEV_HAS_LARGEDESC 0x0020 /* device supports jumbo frames and needs packet format 2 */ +#define DEV_NEED_TIMERIRQ 0x0001 /* set the timer irq flag in the irq mask */ +#define DEV_NEED_LINKTIMER 0x0002 /* poll link settings. Relies on the timer irq */ +#define DEV_HAS_LARGEDESC 0x0004 /* device supports jumbo frames and needs packet format 2 */ enum { NvRegIrqStatus = 0x000, @@ -149,13 +148,16 @@ enum { #define NVREG_IRQ_RX 0x0002 #define NVREG_IRQ_RX_NOBUF 0x0004 #define NVREG_IRQ_TX_ERR 0x0008 -#define NVREG_IRQ_TX2 0x0010 +#define NVREG_IRQ_TX_OK 0x0010 #define NVREG_IRQ_TIMER 0x0020 #define NVREG_IRQ_LINK 0x0040 +#define NVREG_IRQ_TX_ERROR 0x0080 #define NVREG_IRQ_TX1 0x0100 -#define NVREG_IRQMASK_WANTED_1 0x005f -#define NVREG_IRQMASK_WANTED_2 0x0147 -#define NVREG_IRQ_UNKNOWN (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR|NVREG_IRQ_TX2|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX1)) +#define NVREG_IRQMASK_WANTED 0x00df + +#define NVREG_IRQ_UNKNOWN (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \ + NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX_ERROR| \ + NVREG_IRQ_TX1)) NvRegUnknownSetupReg6 = 0x008, #define NVREG_UNKSETUP6_VAL 3 @@ -296,7 +298,7 @@ struct ring_desc { #define NV_TX_LASTPACKET (1<<16) #define NV_TX_RETRYERROR (1<<19) -#define NV_TX_LASTPACKET1 (1<<24) +#define NV_TX_FORCED_INTERRUPT (1<<24) #define NV_TX_DEFERRED (1<<26) #define NV_TX_CARRIERLOST (1<<27) #define NV_TX_LATECOLLISION (1<<28) @@ -306,7 +308,7 @@ struct ring_desc { #define NV_TX2_LASTPACKET (1<<29) #define NV_TX2_RETRYERROR (1<<18) -#define NV_TX2_LASTPACKET1 (1<<23) +#define NV_TX2_FORCED_INTERRUPT (1<<30) #define NV_TX2_DEFERRED (1<<25) #define NV_TX2_CARRIERLOST (1<<26) #define NV_TX2_LATECOLLISION (1<<27) @@ -1013,9 +1015,39 @@ static void nv_tx_timeout(struct net_device *dev) struct fe_priv *np = get_nvpriv(dev); u8 __iomem *base = get_hwbase(dev); - dprintk(KERN_DEBUG "%s: Got tx_timeout. irq: %08x\n", dev->name, + printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK); + { + int i; + + printk(KERN_INFO "%s: Ring at %lx: next %d nic %d\n", + dev->name, (unsigned long)np->ring_addr, + np->next_tx, np->nic_tx); + printk(KERN_INFO "%s: Dumping tx registers\n", dev->name); + for (i=0;i<0x400;i+= 32) { + printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n", + i, + readl(base + i + 0), readl(base + i + 4), + readl(base + i + 8), readl(base + i + 12), + readl(base + i + 16), readl(base + i + 20), + readl(base + i + 24), readl(base + i + 28)); + } + printk(KERN_INFO "%s: Dumping tx ring\n", dev->name); + for (i=0;itx_ring[i].PacketBuffer), + le32_to_cpu(np->tx_ring[i].FlagLen), + le32_to_cpu(np->tx_ring[i+1].PacketBuffer), + le32_to_cpu(np->tx_ring[i+1].FlagLen), + le32_to_cpu(np->tx_ring[i+2].PacketBuffer), + le32_to_cpu(np->tx_ring[i+2].FlagLen), + le32_to_cpu(np->tx_ring[i+3].PacketBuffer), + le32_to_cpu(np->tx_ring[i+3].FlagLen)); + } + } + spin_lock_irq(&np->lock); /* 1) stop tx engine */ @@ -1557,7 +1589,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) if (!(events & np->irqmask)) break; - if (events & (NVREG_IRQ_TX1|NVREG_IRQ_TX2|NVREG_IRQ_TX_ERR)) { + if (events & (NVREG_IRQ_TX1|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_ERROR|NVREG_IRQ_TX_ERR)) { spin_lock(&np->lock); nv_tx_done(dev); spin_unlock(&np->lock); @@ -2213,17 +2245,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i if (np->desc_ver == DESC_VER_1) { np->tx_flags = NV_TX_LASTPACKET|NV_TX_VALID; - if (id->driver_data & DEV_NEED_LASTPACKET1) - np->tx_flags |= NV_TX_LASTPACKET1; } else { np->tx_flags = NV_TX2_LASTPACKET|NV_TX2_VALID; - if (id->driver_data & DEV_NEED_LASTPACKET1) - np->tx_flags |= NV_TX2_LASTPACKET1; } - if (id->driver_data & DEV_IRQMASK_1) - np->irqmask = NVREG_IRQMASK_WANTED_1; - if (id->driver_data & DEV_IRQMASK_2) - np->irqmask = NVREG_IRQMASK_WANTED_2; + np->irqmask = NVREG_IRQMASK_WANTED; if (id->driver_data & DEV_NEED_TIMERIRQ) np->irqmask |= NVREG_IRQ_TIMER; if (id->driver_data & DEV_NEED_LINKTIMER) { @@ -2329,73 +2354,63 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) static struct pci_device_id pci_tbl[] = { { /* nForce Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_1), - .driver_data = DEV_IRQMASK_1|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce2 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_2), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce3 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_3), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* nForce3 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* nForce3 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* CK804 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* CK804 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP04 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP04 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP51 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* MCP51 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_13), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15), - .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ| - DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, }, {0,}, }; From ee73362cdd7d9b8166424f5f9e3176c629ac5cb2 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:32:26 +0200 Subject: [PATCH 052/584] [PATCH] forcedeth: 64-bit DMA support This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 210 ++++++++++++++++++++++++++++++---------- 1 file changed, 161 insertions(+), 49 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 746ad0178f8c..4d38acbac4ef 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -89,6 +89,7 @@ * 0.37: 10 Jul 2005: Additional ethtool support, cleanup of pci id list * 0.38: 16 Jul 2005: tx irq rewrite: Use global flags instead of * per-packet flags. + * 0.39: 18 Jul 2005: Add 64bit descriptor support. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -100,7 +101,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.38" +#define FORCEDETH_VERSION "0.39" #define DRV_NAME "forcedeth" #include @@ -138,6 +139,7 @@ #define DEV_NEED_TIMERIRQ 0x0001 /* set the timer irq flag in the irq mask */ #define DEV_NEED_LINKTIMER 0x0002 /* poll link settings. Relies on the timer irq */ #define DEV_HAS_LARGEDESC 0x0004 /* device supports jumbo frames and needs packet format 2 */ +#define DEV_HAS_HIGH_DMA 0x0008 /* device supports 64bit dma */ enum { NvRegIrqStatus = 0x000, @@ -291,6 +293,18 @@ struct ring_desc { u32 FlagLen; }; +struct ring_desc_ex { + u32 PacketBufferHigh; + u32 PacketBufferLow; + u32 Reserved; + u32 FlagLen; +}; + +typedef union _ring_type { + struct ring_desc* orig; + struct ring_desc_ex* ex; +} ring_type; + #define FLAG_MASK_V1 0xffff0000 #define FLAG_MASK_V2 0xffffc000 #define LEN_MASK_V1 (0xffffffff ^ FLAG_MASK_V1) @@ -405,6 +419,7 @@ struct ring_desc { */ #define DESC_VER_1 0x0 #define DESC_VER_2 (0x02100|NVREG_TXRXCTL_RXCHECK) +#define DESC_VER_3 (0x02200|NVREG_TXRXCTL_RXCHECK) /* PHY defines */ #define PHY_OUI_MARVELL 0x5043 @@ -477,7 +492,7 @@ struct fe_priv { /* rx specific fields. * Locking: Within irq hander or disable_irq+spin_lock(&np->lock); */ - struct ring_desc *rx_ring; + ring_type rx_ring; unsigned int cur_rx, refill_rx; struct sk_buff *rx_skbuff[RX_RING]; dma_addr_t rx_dma[RX_RING]; @@ -494,7 +509,7 @@ struct fe_priv { /* * tx specific fields. */ - struct ring_desc *tx_ring; + ring_type tx_ring; unsigned int next_tx, nic_tx; struct sk_buff *tx_skbuff[TX_RING]; dma_addr_t tx_dma[TX_RING]; @@ -529,6 +544,11 @@ static inline u32 nv_descr_getlength(struct ring_desc *prd, u32 v) & ((v == DESC_VER_1) ? LEN_MASK_V1 : LEN_MASK_V2); } +static inline u32 nv_descr_getlength_ex(struct ring_desc_ex *prd, u32 v) +{ + return le32_to_cpu(prd->FlagLen) & LEN_MASK_V2; +} + static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target, int delay, int delaymax, const char *msg) { @@ -813,9 +833,16 @@ static int nv_alloc_rx(struct net_device *dev) } np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len, PCI_DMA_FROMDEVICE); - np->rx_ring[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]); - wmb(); - np->rx_ring[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + np->rx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]); + wmb(); + np->rx_ring.orig[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); + } else { + np->rx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->rx_dma[nr]) >> 32; + np->rx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->rx_dma[nr]) & 0x0FFFFFFFF; + wmb(); + np->rx_ring.ex[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL); + } dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n", dev->name, refill_rx); refill_rx++; @@ -849,7 +876,10 @@ static void nv_init_rx(struct net_device *dev) np->cur_rx = RX_RING; np->refill_rx = 0; for (i = 0; i < RX_RING; i++) - np->rx_ring[i].FlagLen = 0; + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->rx_ring.orig[i].FlagLen = 0; + else + np->rx_ring.ex[i].FlagLen = 0; } static void nv_init_tx(struct net_device *dev) @@ -859,7 +889,10 @@ static void nv_init_tx(struct net_device *dev) np->next_tx = np->nic_tx = 0; for (i = 0; i < TX_RING; i++) - np->tx_ring[i].FlagLen = 0; + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->tx_ring.orig[i].FlagLen = 0; + else + np->tx_ring.ex[i].FlagLen = 0; } static int nv_init_ring(struct net_device *dev) @@ -874,7 +907,10 @@ static void nv_drain_tx(struct net_device *dev) struct fe_priv *np = get_nvpriv(dev); int i; for (i = 0; i < TX_RING; i++) { - np->tx_ring[i].FlagLen = 0; + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->tx_ring.orig[i].FlagLen = 0; + else + np->tx_ring.ex[i].FlagLen = 0; if (np->tx_skbuff[i]) { pci_unmap_single(np->pci_dev, np->tx_dma[i], np->tx_skbuff[i]->len, @@ -891,7 +927,10 @@ static void nv_drain_rx(struct net_device *dev) struct fe_priv *np = get_nvpriv(dev); int i; for (i = 0; i < RX_RING; i++) { - np->rx_ring[i].FlagLen = 0; + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->rx_ring.orig[i].FlagLen = 0; + else + np->rx_ring.ex[i].FlagLen = 0; wmb(); if (np->rx_skbuff[i]) { pci_unmap_single(np->pci_dev, np->rx_dma[i], @@ -922,11 +961,19 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data,skb->len, PCI_DMA_TODEVICE); - np->tx_ring[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]); + else { + np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32; + np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; + } spin_lock_irq(&np->lock); wmb(); - np->tx_ring[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags ); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags ); + else + np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags ); dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission.\n", dev->name, np->next_tx); { @@ -964,7 +1011,10 @@ static void nv_tx_done(struct net_device *dev) while (np->nic_tx != np->next_tx) { i = np->nic_tx % TX_RING; - Flags = le32_to_cpu(np->tx_ring[i].FlagLen); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + Flags = le32_to_cpu(np->tx_ring.orig[i].FlagLen); + else + Flags = le32_to_cpu(np->tx_ring.ex[i].FlagLen); dprintk(KERN_DEBUG "%s: nv_tx_done: looking at packet %d, Flags 0x%x.\n", dev->name, np->nic_tx, Flags); @@ -1035,16 +1085,33 @@ static void nv_tx_timeout(struct net_device *dev) } printk(KERN_INFO "%s: Dumping tx ring\n", dev->name); for (i=0;itx_ring[i].PacketBuffer), - le32_to_cpu(np->tx_ring[i].FlagLen), - le32_to_cpu(np->tx_ring[i+1].PacketBuffer), - le32_to_cpu(np->tx_ring[i+1].FlagLen), - le32_to_cpu(np->tx_ring[i+2].PacketBuffer), - le32_to_cpu(np->tx_ring[i+2].FlagLen), - le32_to_cpu(np->tx_ring[i+3].PacketBuffer), - le32_to_cpu(np->tx_ring[i+3].FlagLen)); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + printk(KERN_INFO "%03x: %08x %08x // %08x %08x // %08x %08x // %08x %08x\n", + i, + le32_to_cpu(np->tx_ring.orig[i].PacketBuffer), + le32_to_cpu(np->tx_ring.orig[i].FlagLen), + le32_to_cpu(np->tx_ring.orig[i+1].PacketBuffer), + le32_to_cpu(np->tx_ring.orig[i+1].FlagLen), + le32_to_cpu(np->tx_ring.orig[i+2].PacketBuffer), + le32_to_cpu(np->tx_ring.orig[i+2].FlagLen), + le32_to_cpu(np->tx_ring.orig[i+3].PacketBuffer), + le32_to_cpu(np->tx_ring.orig[i+3].FlagLen)); + } else { + printk(KERN_INFO "%03x: %08x %08x %08x // %08x %08x %08x // %08x %08x %08x // %08x %08x %08x\n", + i, + le32_to_cpu(np->tx_ring.ex[i].PacketBufferHigh), + le32_to_cpu(np->tx_ring.ex[i].PacketBufferLow), + le32_to_cpu(np->tx_ring.ex[i].FlagLen), + le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferHigh), + le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferLow), + le32_to_cpu(np->tx_ring.ex[i+1].FlagLen), + le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferHigh), + le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferLow), + le32_to_cpu(np->tx_ring.ex[i+2].FlagLen), + le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferHigh), + le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferLow), + le32_to_cpu(np->tx_ring.ex[i+3].FlagLen)); + } } } @@ -1061,7 +1128,10 @@ static void nv_tx_timeout(struct net_device *dev) printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name); nv_drain_tx(dev); np->next_tx = np->nic_tx = 0; - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + else + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); netif_wake_queue(dev); } @@ -1136,8 +1206,13 @@ static void nv_rx_process(struct net_device *dev) break; /* we scanned the whole ring - do not continue */ i = np->cur_rx % RX_RING; - Flags = le32_to_cpu(np->rx_ring[i].FlagLen); - len = nv_descr_getlength(&np->rx_ring[i], np->desc_ver); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + Flags = le32_to_cpu(np->rx_ring.orig[i].FlagLen); + len = nv_descr_getlength(&np->rx_ring.orig[i], np->desc_ver); + } else { + Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen); + len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver); + } dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n", dev->name, np->cur_rx, Flags); @@ -1321,7 +1396,10 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) /* reinit nic view of the rx queue */ writel(np->rx_buf_sz, base + NvRegOffloadConfig); writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + else + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); pci_push(base); @@ -1982,7 +2060,10 @@ static int nv_open(struct net_device *dev) /* 4) give hw rings */ writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + else + writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); @@ -2173,24 +2254,48 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i } /* handle different descriptor versions */ - np->desc_ver = DESC_VER_1; - np->pkt_limit = NV_PKTLIMIT_1; - if (id->driver_data & DEV_HAS_LARGEDESC) { + if (id->driver_data & DEV_HAS_HIGH_DMA) { + /* packet format 3: supports 40-bit addressing */ + np->desc_ver = DESC_VER_3; + if (pci_set_dma_mask(pci_dev, 0x0000007fffffffffULL)) { + printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n", + pci_name(pci_dev)); + } + } else if (id->driver_data & DEV_HAS_LARGEDESC) { + /* packet format 2: supports jumbo frames */ np->desc_ver = DESC_VER_2; - np->pkt_limit = NV_PKTLIMIT_2; + } else { + /* original packet format */ + np->desc_ver = DESC_VER_1; } - + + np->pkt_limit = NV_PKTLIMIT_1; + if (id->driver_data & DEV_HAS_LARGEDESC) + np->pkt_limit = NV_PKTLIMIT_2; + err = -ENOMEM; np->base = ioremap(addr, NV_PCI_REGSZ); if (!np->base) goto out_relreg; dev->base_addr = (unsigned long)np->base; + dev->irq = pci_dev->irq; - np->rx_ring = pci_alloc_consistent(pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), - &np->ring_addr); - if (!np->rx_ring) - goto out_unmap; - np->tx_ring = &np->rx_ring[RX_RING]; + + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + np->rx_ring.orig = pci_alloc_consistent(pci_dev, + sizeof(struct ring_desc) * (RX_RING + TX_RING), + &np->ring_addr); + if (!np->rx_ring.orig) + goto out_unmap; + np->tx_ring.orig = &np->rx_ring.orig[RX_RING]; + } else { + np->rx_ring.ex = pci_alloc_consistent(pci_dev, + sizeof(struct ring_desc_ex) * (RX_RING + TX_RING), + &np->ring_addr); + if (!np->rx_ring.ex) + goto out_unmap; + np->tx_ring.ex = &np->rx_ring.ex[RX_RING]; + } dev->open = nv_open; dev->stop = nv_close; @@ -2313,8 +2418,12 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i return 0; out_freering: - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), - np->rx_ring, np->ring_addr); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), + np->rx_ring.orig, np->ring_addr); + else + pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (RX_RING + TX_RING), + np->rx_ring.ex, np->ring_addr); pci_set_drvdata(pci_dev, NULL); out_unmap: iounmap(get_hwbase(dev)); @@ -2343,7 +2452,10 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) writel(np->orig_mac[1], base + NvRegMacAddrB); /* free all structures */ - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), np->rx_ring, np->ring_addr); + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) + pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), np->rx_ring.orig, np->ring_addr); + else + pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (RX_RING + TX_RING), np->rx_ring.ex, np->ring_addr); iounmap(get_hwbase(dev)); pci_release_regions(pci_dev); pci_disable_device(pci_dev); @@ -2382,35 +2494,35 @@ static struct pci_device_id pci_tbl[] = { }, { /* CK804 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, { /* CK804 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, { /* MCP04 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, { /* MCP04 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, { /* MCP51 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA, }, { /* MCP51 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_13), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA, }, {0,}, }; From 72b317825728942383b0c2e35016d29bbfb4df00 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:33:34 +0200 Subject: [PATCH 053/584] [PATCH] forcedeth: Add set_mac_address support This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 63 ++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 4d38acbac4ef..1e691024868f 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -90,6 +90,7 @@ * 0.38: 16 Jul 2005: tx irq rewrite: Use global flags instead of * per-packet flags. * 0.39: 18 Jul 2005: Add 64bit descriptor support. + * 0.40: 19 Jul 2005: Add support for mac address change. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -101,7 +102,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.39" +#define FORCEDETH_VERSION "0.40" #define DRV_NAME "forcedeth" #include @@ -1416,6 +1417,54 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static void nv_copy_mac_to_hw(struct net_device *dev) +{ + u8 *base = get_hwbase(dev); + u32 mac[2]; + + mac[0] = (dev->dev_addr[0] << 0) + (dev->dev_addr[1] << 8) + + (dev->dev_addr[2] << 16) + (dev->dev_addr[3] << 24); + mac[1] = (dev->dev_addr[4] << 0) + (dev->dev_addr[5] << 8); + + writel(mac[0], base + NvRegMacAddrA); + writel(mac[1], base + NvRegMacAddrB); +} + +/* + * nv_set_mac_address: dev->set_mac_address function + * Called with rtnl_lock() held. + */ +static int nv_set_mac_address(struct net_device *dev, void *addr) +{ + struct fe_priv *np = get_nvpriv(dev); + struct sockaddr *macaddr = (struct sockaddr*)addr; + + if(!is_valid_ether_addr(macaddr->sa_data)) + return -EADDRNOTAVAIL; + + /* synchronized against open : rtnl_lock() held by caller */ + memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN); + + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&np->lock); + + /* stop rx engine */ + nv_stop_rx(dev); + + /* set mac address */ + nv_copy_mac_to_hw(dev); + + /* restart rx engine */ + nv_start_rx(dev); + spin_unlock_irq(&np->lock); + spin_unlock_bh(&dev->xmit_lock); + } else { + nv_copy_mac_to_hw(dev); + } + return 0; +} + /* * nv_set_multicast: dev->set_multicast function * Called with dev->xmit_lock held. @@ -2047,16 +2096,7 @@ static int nv_open(struct net_device *dev) np->in_shutdown = 0; /* 3) set mac address */ - { - u32 mac[2]; - - mac[0] = (dev->dev_addr[0] << 0) + (dev->dev_addr[1] << 8) + - (dev->dev_addr[2] << 16) + (dev->dev_addr[3] << 24); - mac[1] = (dev->dev_addr[4] << 0) + (dev->dev_addr[5] << 8); - - writel(mac[0], base + NvRegMacAddrA); - writel(mac[1], base + NvRegMacAddrB); - } + nv_copy_mac_to_hw(dev); /* 4) give hw rings */ writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); @@ -2302,6 +2342,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i dev->hard_start_xmit = nv_start_xmit; dev->get_stats = nv_get_stats; dev->change_mtu = nv_change_mtu; + dev->set_mac_address = nv_set_mac_address; dev->set_multicast_list = nv_set_multicast; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = nv_poll_controller; From b3df9f813bc7b9db62ae0c90b8990b1cebf97345 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 31 Jul 2005 18:38:58 +0200 Subject: [PATCH 054/584] [PATCH] forcedeth: write back original mac address during ifdown This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 1e691024868f..f165ae973985 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -91,6 +91,8 @@ * per-packet flags. * 0.39: 18 Jul 2005: Add 64bit descriptor support. * 0.40: 19 Jul 2005: Add support for mac address change. + * 0.41: 30 Jul 2005: Write back original MAC in nv_close instead + * of nv_remove * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -102,7 +104,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.40" +#define FORCEDETH_VERSION "0.41" #define DRV_NAME "forcedeth" #include @@ -2230,6 +2232,12 @@ static int nv_close(struct net_device *dev) if (np->wolenabled) nv_start_rx(dev); + /* special op: write back the misordered MAC address - otherwise + * the next nv_probe would see a wrong address. + */ + writel(np->orig_mac[0], base + NvRegMacAddrA); + writel(np->orig_mac[1], base + NvRegMacAddrB); + /* FIXME: power down nic */ return 0; @@ -2482,16 +2490,9 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) { struct net_device *dev = pci_get_drvdata(pci_dev); struct fe_priv *np = get_nvpriv(dev); - u8 __iomem *base = get_hwbase(dev); unregister_netdev(dev); - /* special op: write back the misordered MAC address - otherwise - * the next nv_probe would see a wrong address. - */ - writel(np->orig_mac[0], base + NvRegMacAddrA); - writel(np->orig_mac[1], base + NvRegMacAddrB); - /* free all structures */ if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), np->rx_ring.orig, np->ring_addr); From 8a60a07129fad60bba779a2a4038c7518b167fc7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 31 Jul 2005 13:13:24 -0400 Subject: [PATCH 055/584] libata: trim trailing whitespace. Also, fixup a tabs-to-spaces block of code in ata_piix. --- drivers/scsi/ata_piix.c | 14 ++-- drivers/scsi/libata-core.c | 4 +- drivers/scsi/libata.h | 2 +- drivers/scsi/sata_qstor.c | 2 +- drivers/scsi/sata_sil.c | 4 +- drivers/scsi/sata_sis.c | 2 +- drivers/scsi/sata_svw.c | 10 +-- drivers/scsi/sata_sx4.c | 138 ++++++++++++++++++------------------- drivers/scsi/sata_uli.c | 2 +- drivers/scsi/sata_via.c | 2 +- drivers/scsi/sata_vsc.c | 2 +- include/linux/libata.h | 2 +- 12 files changed, 92 insertions(+), 92 deletions(-) diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index a2cfade2c1c6..9f1bdfbd8d0a 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -629,13 +629,13 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) port_info[1] = NULL; if (port_info[0]->host_flags & PIIX_FLAG_AHCI) { - u8 tmp; - pci_read_config_byte(pdev, PIIX_SCC, &tmp); - if (tmp == PIIX_AHCI_DEVICE) { - int rc = piix_disable_ahci(pdev); - if (rc) - return rc; - } + u8 tmp; + pci_read_config_byte(pdev, PIIX_SCC, &tmp); + if (tmp == PIIX_AHCI_DEVICE) { + int rc = piix_disable_ahci(pdev); + if (rc) + return rc; + } } if (port_info[0]->host_flags & PIIX_FLAG_COMBINED) { diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 73b1f72b7e43..6e56af23957b 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1304,12 +1304,12 @@ static inline u8 ata_dev_knobble(struct ata_port *ap) /** * ata_dev_config - Run device specific handlers and check for * SATA->PATA bridges - * @ap: Bus + * @ap: Bus * @i: Device * * LOCKING: */ - + void ata_dev_config(struct ata_port *ap, unsigned int i) { /* limit bridge transfers to udma5, 200 sectors */ diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index d90430bbb0de..91b68eedb3c9 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -72,7 +72,7 @@ extern unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, extern void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq); -extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, +extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, unsigned int (*actor) (struct ata_scsi_args *args, u8 *rbuf, unsigned int buflen)); diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 1383e8a28d72..dca9ed7ac760 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -431,7 +431,7 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) continue; DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n", ap->id, qc->tf.protocol, status); - + /* complete taskfile transaction */ pp->state = qs_state_idle; ata_qc_complete(qc, status); diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 49ed557a4b66..a1b81d43b11f 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -323,13 +323,13 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev) while ((len > 0) && (s[len - 1] == ' ')) len--; - for (n = 0; sil_blacklist[n].product; n++) + for (n = 0; sil_blacklist[n].product; n++) if (!memcmp(sil_blacklist[n].product, s, strlen(sil_blacklist[n].product))) { quirks = sil_blacklist[n].quirk; break; } - + /* limit requests to 15 sectors */ if (quirks & SIL_QUIRK_MOD15WRITE) { printk(KERN_INFO "ata%u(%u): applying Seagate errata fix\n", diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index e418b89c6b9d..b250ae0c7773 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -234,7 +234,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_dword(pdev, SIS_GENCTL, &genctl); if ((genctl & GENCTL_IOMAPPED_SCR) == 0) probe_ent->host_flags |= SIS_FLAG_CFGSCR; - + /* if hardware thinks SCRs are in IO space, but there are * no IO resources assigned, change to PCI cfg space. */ diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 858e07185dbd..6fd2ce1ffcd8 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -195,18 +195,18 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) /* start host DMA transaction */ dmactl = readb(mmio + ATA_DMA_CMD); writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD); - /* There is a race condition in certain SATA controllers that can - be seen when the r/w command is given to the controller before the + /* There is a race condition in certain SATA controllers that can + be seen when the r/w command is given to the controller before the host DMA is started. On a Read command, the controller would initiate the command to the drive even before it sees the DMA start. When there - are very fast drives connected to the controller, or when the data request + are very fast drives connected to the controller, or when the data request hits in the drive cache, there is the possibility that the drive returns a part or all of the requested data to the controller before the DMA start is issued. In this case, the controller would become confused as to what to do with the data. In the worst case when all the data is returned back to the controller, the controller could hang. In other cases it could return partial data returning in data corruption. This problem has been seen in PPC systems and can also appear - on an system with very fast disks, where the SATA controller is sitting behind a + on an system with very fast disks, where the SATA controller is sitting behind a number of bridges, and hence there is significant latency between the r/w command and the start command. */ /* issue r/w command if the access is to ATA*/ @@ -214,7 +214,7 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) ap->ops->exec_command(ap, &qc->tf); } - + static u8 k2_stat_check_status(struct ata_port *ap) { return readl((void *) ap->ioaddr.status_addr); diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 140cea05de3f..8e59868b24bb 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -94,7 +94,7 @@ enum { PDC_DIMM1_CONTROL_OFFSET = 0x84, PDC_SDRAM_CONTROL_OFFSET = 0x88, PDC_I2C_WRITE = 0x00000000, - PDC_I2C_READ = 0x00000040, + PDC_I2C_READ = 0x00000040, PDC_I2C_START = 0x00000080, PDC_I2C_MASK_INT = 0x00000020, PDC_I2C_COMPLETE = 0x00010000, @@ -105,16 +105,16 @@ enum { PDC_DIMM_SPD_COLUMN_NUM = 4, PDC_DIMM_SPD_MODULE_ROW = 5, PDC_DIMM_SPD_TYPE = 11, - PDC_DIMM_SPD_FRESH_RATE = 12, - PDC_DIMM_SPD_BANK_NUM = 17, + PDC_DIMM_SPD_FRESH_RATE = 12, + PDC_DIMM_SPD_BANK_NUM = 17, PDC_DIMM_SPD_CAS_LATENCY = 18, - PDC_DIMM_SPD_ATTRIBUTE = 21, + PDC_DIMM_SPD_ATTRIBUTE = 21, PDC_DIMM_SPD_ROW_PRE_CHARGE = 27, - PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28, + PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28, PDC_DIMM_SPD_RAS_CAS_DELAY = 29, PDC_DIMM_SPD_ACTIVE_PRECHARGE = 30, PDC_DIMM_SPD_SYSTEM_FREQ = 126, - PDC_CTL_STATUS = 0x08, + PDC_CTL_STATUS = 0x08, PDC_DIMM_WINDOW_CTLR = 0x0C, PDC_TIME_CONTROL = 0x3C, PDC_TIME_PERIOD = 0x40, @@ -157,15 +157,15 @@ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); static void pdc20621_host_stop(struct ata_host_set *host_set); static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe); static int pdc20621_detect_dimm(struct ata_probe_ent *pe); -static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, +static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, u32 subaddr, u32 *pdata); static int pdc20621_prog_dimm0(struct ata_probe_ent *pe); static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe); #ifdef ATA_VERBOSE_DEBUG -static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, +static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size); #endif -static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, +static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size); static void pdc20621_irq_clear(struct ata_port *ap); static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc); @@ -922,7 +922,7 @@ static void pdc_sata_setup_port(struct ata_ioports *port, unsigned long base) #ifdef ATA_VERBOSE_DEBUG -static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, +static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size) { u32 window_size; @@ -936,9 +936,9 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; - page_mask = 0x00; - window_size = 0x2000 * 4; /* 32K byte uchar size */ - idx = (u16) (offset / window_size); + page_mask = 0x00; + window_size = 0x2000 * 4; /* 32K byte uchar size */ + idx = (u16) (offset / window_size); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); @@ -947,19 +947,19 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : + dist = ((long) (window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); - memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), + memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), dist); - psource += dist; + psource += dist; size -= dist; for (; (long) size >= (long) window_size ;) { writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), + memcpy_fromio((char *) psource, (char *) (dimm_mmio), window_size / 4); psource += window_size; size -= window_size; @@ -971,14 +971,14 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), + memcpy_fromio((char *) psource, (char *) (dimm_mmio), size / 4); } } #endif -static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, +static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size) { u32 window_size; @@ -989,16 +989,16 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, struct pdc_host_priv *hpriv = pe->private_data; void *dimm_mmio = hpriv->dimm_mmio; - /* hard-code chip #0 */ + /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; - page_mask = 0x00; - window_size = 0x2000 * 4; /* 32K byte uchar size */ + page_mask = 0x00; + window_size = 0x2000 * 4; /* 32K byte uchar size */ idx = (u16) (offset / window_size); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - offset -= (idx * window_size); + offset -= (idx * window_size); idx++; dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); @@ -1006,12 +1006,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); - psource += dist; + psource += dist; size -= dist; for (; (long) size >= (long) window_size ;) { writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_toio((char *) (dimm_mmio), (char *) psource, + memcpy_toio((char *) (dimm_mmio), (char *) psource, window_size / 4); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); @@ -1019,7 +1019,7 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, size -= window_size; idx ++; } - + if (size) { writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); @@ -1030,12 +1030,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, } -static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, +static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, u32 subaddr, u32 *pdata) { void *mmio = pe->mmio_base; u32 i2creg = 0; - u32 status; + u32 status; u32 count =0; /* hard-code chip #0 */ @@ -1049,7 +1049,7 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, readl(mmio + PDC_I2C_ADDR_DATA_OFFSET); /* Write Control to perform read operation, mask int */ - writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT, + writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT, mmio + PDC_I2C_CONTROL_OFFSET); for (count = 0; count <= 1000; count ++) { @@ -1062,26 +1062,26 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, } *pdata = (status >> 8) & 0x000000ff; - return 1; + return 1; } static int pdc20621_detect_dimm(struct ata_probe_ent *pe) { u32 data=0 ; - if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_SYSTEM_FREQ, &data)) { if (data == 100) return 100; } else return 0; - + if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 9, &data)) { - if(data <= 0x75) + if(data <= 0x75) return 133; } else return 0; - + return 0; } @@ -1091,15 +1091,15 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) u32 spd0[50]; u32 data = 0; int size, i; - u8 bdimmsize; + u8 bdimmsize; void *mmio = pe->mmio_base; static const struct { unsigned int reg; unsigned int ofs; } pdc_i2c_read_data [] = { - { PDC_DIMM_SPD_TYPE, 11 }, + { PDC_DIMM_SPD_TYPE, 11 }, { PDC_DIMM_SPD_FRESH_RATE, 12 }, - { PDC_DIMM_SPD_COLUMN_NUM, 4 }, + { PDC_DIMM_SPD_COLUMN_NUM, 4 }, { PDC_DIMM_SPD_ATTRIBUTE, 21 }, { PDC_DIMM_SPD_ROW_NUM, 3 }, { PDC_DIMM_SPD_BANK_NUM, 17 }, @@ -1108,7 +1108,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) { PDC_DIMM_SPD_ROW_ACTIVE_DELAY, 28 }, { PDC_DIMM_SPD_RAS_CAS_DELAY, 29 }, { PDC_DIMM_SPD_ACTIVE_PRECHARGE, 30 }, - { PDC_DIMM_SPD_CAS_LATENCY, 18 }, + { PDC_DIMM_SPD_CAS_LATENCY, 18 }, }; /* hard-code chip #0 */ @@ -1116,17 +1116,17 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) for(i=0; i spd0[28]) - ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10; + data |= (((((spd0[29] > spd0[28]) + ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10; data |= ((spd0[30] - spd0[29] + 9) / 10 - 2) << 12; - - if (spd0[18] & 0x08) + + if (spd0[18] & 0x08) data |= ((0x03) << 14); else if (spd0[18] & 0x04) data |= ((0x02) << 14); @@ -1135,7 +1135,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) else data |= (0 << 14); - /* + /* Calculate the size of bDIMMSize (power of 2) and merge the DIMM size by program start/end address. */ @@ -1145,9 +1145,9 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) data |= (((size / 16) - 1) << 16); data |= (0 << 23); data |= 8; - writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET); + writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET); readl(mmio + PDC_DIMM0_CONTROL_OFFSET); - return size; + return size; } @@ -1167,12 +1167,12 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe) Refresh Enable (bit 17) */ - data = 0x022259F1; + data = 0x022259F1; writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET); readl(mmio + PDC_SDRAM_CONTROL_OFFSET); /* Turn on for ECC */ - pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0); if (spd0 == 0x02) { data |= (0x01 << 16); @@ -1186,22 +1186,22 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe) data |= (1<<19); writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET); - error = 1; + error = 1; for (i = 1; i <= 10; i++) { /* polling ~5 secs */ data = readl(mmio + PDC_SDRAM_CONTROL_OFFSET); if (!(data & (1<<19))) { error = 0; - break; + break; } msleep(i*100); } return error; } - + static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) { - int speed, size, length; + int speed, size, length; u32 addr,spd0,pci_status; u32 tmp=0; u32 time_period=0; @@ -1228,7 +1228,7 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) /* Wait 3 seconds */ msleep(3000); - /* + /* When timer is enabled, counter is decreased every internal clock cycle. */ @@ -1236,24 +1236,24 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) tcount = readl(mmio + PDC_TIME_COUNTER); VPRINTK("Time Counter Register (0x44): 0x%x\n", tcount); - /* + /* If SX4 is on PCI-X bus, after 3 seconds, the timer counter register should be >= (0xffffffff - 3x10^8). */ if(tcount >= PCI_X_TCOUNT) { ticks = (time_period - tcount); VPRINTK("Num counters 0x%x (%d)\n", ticks, ticks); - + clock = (ticks / 300000); VPRINTK("10 * Internal clk = 0x%x (%d)\n", clock, clock); - + clock = (clock * 33); VPRINTK("10 * Internal clk * 33 = 0x%x (%d)\n", clock, clock); /* PLL F Param (bit 22:16) */ fparam = (1400000 / clock) - 2; VPRINTK("PLL F Param: 0x%x (%d)\n", fparam, fparam); - + /* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */ pci_status = (0x8a001824 | (fparam << 16)); } else @@ -1264,21 +1264,21 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) writel(pci_status, mmio + PDC_CTL_STATUS); readl(mmio + PDC_CTL_STATUS); - /* + /* Read SPD of DIMM by I2C interface, and program the DIMM Module Controller. */ if (!(speed = pdc20621_detect_dimm(pe))) { - printk(KERN_ERR "Detect Local DIMM Fail\n"); + printk(KERN_ERR "Detect Local DIMM Fail\n"); return 1; /* DIMM error */ } VPRINTK("Local DIMM Speed = %d\n", speed); - /* Programming DIMM0 Module Control Register (index_CID0:80h) */ + /* Programming DIMM0 Module Control Register (index_CID0:80h) */ size = pdc20621_prog_dimm0(pe); VPRINTK("Local DIMM Size = %dMB\n",size); - /* Programming DIMM Module Global Control Register (index_CID0:88h) */ + /* Programming DIMM Module Global Control Register (index_CID0:88h) */ if (pdc20621_prog_dimm_global(pe)) { printk(KERN_ERR "Programming DIMM Module Global Control Register Fail\n"); return 1; @@ -1297,30 +1297,30 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x10040, 40); pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); - pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040, + pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x40, 40); pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); } #endif /* ECC initiliazation. */ - pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0); if (spd0 == 0x02) { VPRINTK("Start ECC initialization\n"); addr = 0; length = size * 1024 * 1024; while (addr < length) { - pdc20621_put_to_dimm(pe, (void *) &tmp, addr, + pdc20621_put_to_dimm(pe, (void *) &tmp, addr, sizeof(u32)); addr += sizeof(u32); } diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index a71fb54eebd3..eb202a73bc0e 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -214,7 +214,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = -ENOMEM; goto err_out_regions; } - + switch (board_idx) { case uli_5287: probe_ent->port[0].scr_addr = ULI5287_BASE; diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index f43183c19a12..feff10980487 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -347,7 +347,7 @@ static int svia_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent = vt6420_init_probe_ent(pdev); else probe_ent = vt6421_init_probe_ent(pdev); - + if (!probe_ent) { printk(KERN_ERR DRV_NAME "(%s): out of memory\n", pci_name(pdev)); diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index c5e09dc6f3de..cb3a6d89cf00 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -342,7 +342,7 @@ static int __devinit vsc_sata_init_one (struct pci_dev *pdev, const struct pci_d pci_set_master(pdev); - /* + /* * Config offset 0x98 is "Extended Control and Status Register 0" * Default value is (1 << 28). All bits except bit 28 are reserved in * DPA mode. If bit 28 is set, LED 0 reflects all ports' activity. diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563b..85b0aaee0ef8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -644,7 +644,7 @@ static inline void scr_write(struct ata_port *ap, unsigned int reg, u32 val) ap->ops->scr_write(ap, reg, val); } -static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, +static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, u32 val) { ap->ops->scr_write(ap, reg, val); From 24d109422787119337cd83732feef930d6a23f5c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 21:52:18 +1000 Subject: [PATCH 056/584] drm: fix ioctl direction in r128 getparam Set the IOWR correctly for r128 getparam. From: Egbert Eich Signed-off-by: Dave Airlie --- drivers/char/drm/r128_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/drm/r128_drm.h b/drivers/char/drm/r128_drm.h index 0cba17d1e0ff..b616cd3ed2cd 100644 --- a/drivers/char/drm/r128_drm.h +++ b/drivers/char/drm/r128_drm.h @@ -215,7 +215,7 @@ typedef struct drm_r128_sarea { #define DRM_IOCTL_R128_INDIRECT DRM_IOWR(DRM_COMMAND_BASE + DRM_R128_INDIRECT, drm_r128_indirect_t) #define DRM_IOCTL_R128_FULLSCREEN DRM_IOW( DRM_COMMAND_BASE + DRM_R128_FULLSCREEN, drm_r128_fullscreen_t) #define DRM_IOCTL_R128_CLEAR2 DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CLEAR2, drm_r128_clear2_t) -#define DRM_IOCTL_R128_GETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_R128_GETPARAM, drm_r128_getparam_t) +#define DRM_IOCTL_R128_GETPARAM DRM_IOWR( DRM_COMMAND_BASE + DRM_R128_GETPARAM, drm_r128_getparam_t) #define DRM_IOCTL_R128_FLIP DRM_IO( DRM_COMMAND_BASE + DRM_R128_FLIP) typedef struct drm_r128_init { From c73681e77b40697d16ada777adf2c6dc4db05917 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 22:02:48 +1000 Subject: [PATCH 057/584] drm: copy the right data back to userspace for getreserved contexts ioctl This fixes the information copied back to userspace by the get reserved contexts ioctl. From: Egbert Eich Signed-off-by: Dave Airlie --- drivers/char/drm/drm_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/drm/drm_context.c b/drivers/char/drm/drm_context.c index a7cfabd1ca2e..5af46c9830ec 100644 --- a/drivers/char/drm/drm_context.c +++ b/drivers/char/drm/drm_context.c @@ -369,7 +369,7 @@ int drm_resctx( struct inode *inode, struct file *filp, for ( i = 0 ; i < DRM_RESERVED_CONTEXTS ; i++ ) { ctx.handle = i; if ( copy_to_user( &res.contexts[i], - &i, sizeof(i) ) ) + &ctx, sizeof(ctx) ) ) return -EFAULT; } } From d1f2b55ad2c11f46e30547a9f7754e99b478348e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 22:11:22 +1000 Subject: [PATCH 058/584] drm: updated DRM map patch for 32/64 bit systems I basically combined Paul's patches with additions that I had made for PCI scatter gather. I also tried more carefully to avoid problems with the same token assigned multiple times while trying to use the base address in the token if possible to gain as much backward compatibility as possible for broken DRI clients. From: Paul Mackerras and Egbert Eich Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 14 +++--- drivers/char/drm/drm_bufs.c | 81 +++++++++++++++++++++++++--------- drivers/char/drm/drm_context.c | 15 ++++++- drivers/char/drm/drm_ioctl.c | 2 +- drivers/char/drm/drm_proc.c | 4 +- drivers/char/drm/drm_scatter.c | 11 ++++- drivers/char/drm/drm_vm.c | 20 ++++----- drivers/char/drm/ffb_drv.c | 5 +-- drivers/char/drm/i810_dma.c | 1 + drivers/char/drm/i830_dma.c | 1 + drivers/char/drm/mga_dma.c | 1 + drivers/char/drm/mga_drm.h | 2 +- drivers/char/drm/mga_ioc32.c | 67 ++++++++++++++++++++++++++++ drivers/char/drm/r128_cce.c | 6 ++- drivers/char/drm/radeon_cp.c | 1 + 15 files changed, 177 insertions(+), 54 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index fb2af9279148..0a4358996970 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -539,6 +539,7 @@ typedef struct drm_dma_handle { typedef struct drm_map_list { struct list_head head; /**< list head */ drm_map_t *map; /**< mapping */ + unsigned int user_token; } drm_map_list_t; typedef drm_map_t drm_local_map_t; @@ -759,6 +760,7 @@ typedef struct drm_device { struct drm_driver *driver; drm_local_map_t *agp_buffer_map; + unsigned int agp_buffer_token; drm_head_t primary; /**< primary screen head */ } drm_device_t; @@ -1048,16 +1050,12 @@ static __inline__ void drm_core_ioremapfree(struct drm_map *map, struct drm_devi drm_ioremapfree( map->handle, map->size, dev ); } -static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev, unsigned long offset) +static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev, unsigned int token) { - struct list_head *_list; - list_for_each( _list, &dev->maplist->head ) { - drm_map_list_t *_entry = list_entry( _list, drm_map_list_t, head ); - if ( _entry->map && - _entry->map->offset == offset ) { + drm_map_list_t *_entry; + list_for_each_entry(_entry, &dev->maplist->head, head) + if (_entry->user_token == token) return _entry->map; - } - } return NULL; } diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index fcc8d244f46f..d1e0b106c261 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -64,13 +64,41 @@ static drm_local_map_t *drm_find_matching_map(drm_device_t *dev, return NULL; } -#ifdef CONFIG_COMPAT /* - * Used to allocate 32-bit handles for _DRM_SHM regions - * The 0x10000000 value is chosen to be out of the way of - * FB/register and GART physical addresses. + * Used to allocate 32-bit handles for mappings. */ -static unsigned int map32_handle = 0x10000000; +#define START_RANGE 0x10000000 +#define END_RANGE 0x40000000 + +#ifdef _LP64 +static __inline__ unsigned int HandleID(unsigned long lhandle, drm_device_t *dev) +{ + static unsigned int map32_handle = START_RANGE; + unsigned int hash; + + if (lhandle & 0xffffffff00000000) { + hash = map32_handle; + map32_handle += PAGE_SIZE; + if (map32_handle > END_RANGE) + map32_handle = START_RANGE; + } else + hash = lhandle; + + while (1) { + drm_map_list_t *_entry; + list_for_each_entry(_entry, &dev->maplist->head,head) { + if (_entry->user_token == hash) + break; + } + if (&_entry->head == &dev->maplist->head) + return hash; + + hash += PAGE_SIZE; + map32_handle += PAGE_SIZE; + } +} +#else +# define HandleID(x,dev) (unsigned int)(x) #endif /** @@ -198,7 +226,7 @@ int drm_addmap(drm_device_t * dev, unsigned int offset, drm_free(map, sizeof(*map), DRM_MEM_MAPS); return -EINVAL; } - map->offset += dev->sg->handle; + map->offset += (unsigned long)dev->sg->virtual; break; case _DRM_CONSISTENT: /* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G, @@ -229,12 +257,11 @@ int drm_addmap(drm_device_t * dev, unsigned int offset, down(&dev->struct_sem); list_add(&list->head, &dev->maplist->head); -#ifdef CONFIG_COMPAT - /* Assign a 32-bit handle for _DRM_SHM mappings */ + /* Assign a 32-bit handle */ /* We do it here so that dev->struct_sem protects the increment */ - if (map->type == _DRM_SHM) - map->offset = map32_handle += PAGE_SIZE; -#endif + list->user_token = HandleID(map->type==_DRM_SHM + ? (unsigned long)map->handle + : map->offset, dev); up(&dev->struct_sem); *map_ptr = map; @@ -251,6 +278,7 @@ int drm_addmap_ioctl(struct inode *inode, struct file *filp, drm_map_t *map_ptr; drm_map_t __user *argp = (void __user *)arg; int err; + unsigned long handle = 0; if (!(filp->f_mode & 3)) return -EACCES; /* Require read/write */ @@ -259,22 +287,29 @@ int drm_addmap_ioctl(struct inode *inode, struct file *filp, return -EFAULT; } - err = drm_addmap( dev, map.offset, map.size, map.type, map.flags, - &map_ptr ); + err = drm_addmap(dev, map.offset, map.size, map.type, map.flags, + &map_ptr); if (err) { return err; } - if (copy_to_user(argp, map_ptr, sizeof(*map_ptr))) - return -EFAULT; - if (map_ptr->type != _DRM_SHM) { - if (copy_to_user(&argp->handle, &map_ptr->offset, - sizeof(map_ptr->offset))) + { + drm_map_list_t *_entry; + list_for_each_entry(_entry, &dev->maplist->head, head) { + if (_entry->map == map_ptr) + handle = _entry->user_token; + } + if (!handle) return -EFAULT; } + + if (copy_to_user(argp, map_ptr, sizeof(*map_ptr))) + return -EFAULT; + if (put_user(handle, &argp->handle)) + return -EFAULT; return 0; -} + } /** @@ -388,7 +423,7 @@ int drm_rmmap_ioctl(struct inode *inode, struct file *filp, drm_map_list_t *r_list = list_entry(list, drm_map_list_t, head); if (r_list->map && - r_list->map->handle == request.handle && + r_list->user_token == (unsigned long) request.handle && r_list->map->flags & _DRM_REMOVABLE) { map = r_list->map; break; @@ -939,7 +974,8 @@ static int drm_addbufs_sg(drm_device_t *dev, drm_buf_desc_t *request) buf->offset = (dma->byte_count + offset); buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset + dev->sg->handle); + buf->address = (void *)(agp_offset + offset + + (unsigned long)dev->sg->virtual); buf->next = NULL; buf->waiting = 0; buf->pending = 0; @@ -1456,6 +1492,7 @@ int drm_mapbufs( struct inode *inode, struct file *filp, || (drm_core_check_feature(dev, DRIVER_FB_DMA) && (dma->flags & _DRM_DMA_USE_FB))) { drm_map_t *map = dev->agp_buffer_map; + unsigned long token = dev->agp_buffer_token; if ( !map ) { retcode = -EINVAL; @@ -1470,7 +1507,7 @@ int drm_mapbufs( struct inode *inode, struct file *filp, virtual = do_mmap( filp, 0, map->size, PROT_READ | PROT_WRITE, MAP_SHARED, - (unsigned long)map->offset ); + token ); #if LINUX_VERSION_CODE <= 0x020402 up( ¤t->mm->mmap_sem ); #else diff --git a/drivers/char/drm/drm_context.c b/drivers/char/drm/drm_context.c index 5af46c9830ec..f515567e5b6f 100644 --- a/drivers/char/drm/drm_context.c +++ b/drivers/char/drm/drm_context.c @@ -212,6 +212,7 @@ int drm_getsareactx(struct inode *inode, struct file *filp, drm_ctx_priv_map_t __user *argp = (void __user *)arg; drm_ctx_priv_map_t request; drm_map_t *map; + drm_map_list_t *_entry; if (copy_from_user(&request, argp, sizeof(request))) return -EFAULT; @@ -225,7 +226,17 @@ int drm_getsareactx(struct inode *inode, struct file *filp, map = dev->context_sareas[request.ctx_id]; up(&dev->struct_sem); - request.handle = (void *) map->offset; + request.handle = 0; + list_for_each_entry(_entry, &dev->maplist->head,head) { + if (_entry->map == map) { + request.handle = (void *)(unsigned long)_entry->user_token; + break; + } + } + if (request.handle == 0) + return -EINVAL; + + if (copy_to_user(argp, &request, sizeof(request))) return -EFAULT; return 0; @@ -262,7 +273,7 @@ int drm_setsareactx(struct inode *inode, struct file *filp, list_for_each(list, &dev->maplist->head) { r_list = list_entry(list, drm_map_list_t, head); if (r_list->map - && r_list->map->offset == (unsigned long) request.handle) + && r_list->user_token == (unsigned long) request.handle) goto found; } bad: diff --git a/drivers/char/drm/drm_ioctl.c b/drivers/char/drm/drm_ioctl.c index 39afda0ccabe..d2ed3ba5aca9 100644 --- a/drivers/char/drm/drm_ioctl.c +++ b/drivers/char/drm/drm_ioctl.c @@ -208,7 +208,7 @@ int drm_getmap( struct inode *inode, struct file *filp, map.size = r_list->map->size; map.type = r_list->map->type; map.flags = r_list->map->flags; - map.handle = r_list->map->handle; + map.handle = (void *)(unsigned long) r_list->user_token; map.mtrr = r_list->map->mtrr; up(&dev->struct_sem); diff --git a/drivers/char/drm/drm_proc.c b/drivers/char/drm/drm_proc.c index f4154cc71abb..32d2bb99462c 100644 --- a/drivers/char/drm/drm_proc.c +++ b/drivers/char/drm/drm_proc.c @@ -235,13 +235,13 @@ static int drm__vm_info(char *buf, char **start, off_t offset, int request, type = "??"; else type = types[map->type]; - DRM_PROC_PRINT("%4d 0x%08lx 0x%08lx %4.4s 0x%02x 0x%08lx ", + DRM_PROC_PRINT("%4d 0x%08lx 0x%08lx %4.4s 0x%02x 0x%08x ", i, map->offset, map->size, type, map->flags, - (unsigned long)map->handle); + r_list->user_token); if (map->mtrr < 0) { DRM_PROC_PRINT("none\n"); } else { diff --git a/drivers/char/drm/drm_scatter.c b/drivers/char/drm/drm_scatter.c index 54fddb6ea2d1..ed267d49bc6a 100644 --- a/drivers/char/drm/drm_scatter.c +++ b/drivers/char/drm/drm_scatter.c @@ -61,6 +61,12 @@ void drm_sg_cleanup( drm_sg_mem_t *entry ) DRM_MEM_SGLISTS ); } +#ifdef _LP64 +# define ScatterHandle(x) (unsigned int)((x >> 32) + (x & ((1L << 32) - 1))) +#else +# define ScatterHandle(x) (unsigned int)(x) +#endif + int drm_sg_alloc( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ) { @@ -133,12 +139,13 @@ int drm_sg_alloc( struct inode *inode, struct file *filp, */ memset( entry->virtual, 0, pages << PAGE_SHIFT ); - entry->handle = (unsigned long)entry->virtual; + entry->handle = ScatterHandle((unsigned long)entry->virtual); DRM_DEBUG( "sg alloc handle = %08lx\n", entry->handle ); DRM_DEBUG( "sg alloc virtual = %p\n", entry->virtual ); - for ( i = entry->handle, j = 0 ; j < pages ; i += PAGE_SIZE, j++ ) { + for (i = (unsigned long)entry->virtual, j = 0; j < pages; + i += PAGE_SIZE, j++) { entry->pagelist[j] = vmalloc_to_page((void *)i); if (!entry->pagelist[j]) goto failed; diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index 675d2397def9..99b5c86f7513 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -73,12 +73,13 @@ static __inline__ struct page *drm_do_vm_nopage(struct vm_area_struct *vma, r_list = list_entry(list, drm_map_list_t, head); map = r_list->map; if (!map) continue; - if (map->offset == VM_OFFSET(vma)) break; + if (r_list->user_token == VM_OFFSET(vma)) + break; } if (map && map->type == _DRM_AGP) { unsigned long offset = address - vma->vm_start; - unsigned long baddr = VM_OFFSET(vma) + offset; + unsigned long baddr = map->offset + offset; struct drm_agp_mem *agpmem; struct page *page; @@ -304,7 +305,7 @@ static __inline__ struct page *drm_do_vm_sg_nopage(struct vm_area_struct *vma, offset = address - vma->vm_start; - map_offset = map->offset - dev->sg->handle; + map_offset = map->offset - (unsigned long)dev->sg->virtual; page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT); page = entry->pagelist[page_offset]; get_page(page); @@ -568,13 +569,12 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) for performance, even if the list was a bit longer. */ list_for_each(list, &dev->maplist->head) { - unsigned long off; r_list = list_entry(list, drm_map_list_t, head); map = r_list->map; if (!map) continue; - off = dev->driver->get_map_ofs(map); - if (off == VM_OFFSET(vma)) break; + if (r_list->user_token == VM_OFFSET(vma)) + break; } if (!map || ((map->flags&_DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) @@ -613,7 +613,7 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) /* fall through to _DRM_FRAME_BUFFER... */ case _DRM_FRAME_BUFFER: case _DRM_REGISTERS: - if (VM_OFFSET(vma) >= __pa(high_memory)) { + if (map->offset >= __pa(high_memory)) { #if defined(__i386__) || defined(__x86_64__) if (boot_cpu_data.x86 > 3 && map->type != _DRM_AGP) { pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; @@ -636,12 +636,12 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) offset = dev->driver->get_reg_ofs(dev); #ifdef __sparc__ if (io_remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start, - (VM_OFFSET(vma) + offset) >> PAGE_SHIFT, + (map->offset + offset) >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot)) #else if (io_remap_pfn_range(vma, vma->vm_start, - (VM_OFFSET(vma) + offset) >> PAGE_SHIFT, + (map->offset + offset) >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot)) #endif @@ -649,7 +649,7 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) DRM_DEBUG(" Type = %d; start = 0x%lx, end = 0x%lx," " offset = 0x%lx\n", map->type, - vma->vm_start, vma->vm_end, VM_OFFSET(vma) + offset); + vma->vm_start, vma->vm_end, map->offset + offset); vma->vm_ops = &drm_vm_ops; break; case _DRM_SHM: diff --git a/drivers/char/drm/ffb_drv.c b/drivers/char/drm/ffb_drv.c index ec614fff8f04..1bd0d55ee0f0 100644 --- a/drivers/char/drm/ffb_drv.c +++ b/drivers/char/drm/ffb_drv.c @@ -152,14 +152,11 @@ static drm_map_t *ffb_find_map(struct file *filp, unsigned long off) return NULL; list_for_each(list, &dev->maplist->head) { - unsigned long uoff; - r_list = (drm_map_list_t *)list; map = r_list->map; if (!map) continue; - uoff = (map->offset & 0xffffffff); - if (uoff == off) + if (r_list->user_token == off) return map; } diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c index 0a9ac1f2e215..f9fd5abd774b 100644 --- a/drivers/char/drm/i810_dma.c +++ b/drivers/char/drm/i810_dma.c @@ -351,6 +351,7 @@ static int i810_dma_initialize(drm_device_t *dev, DRM_ERROR("can not find mmio map!\n"); return -EINVAL; } + dev->agp_buffer_token = init->buffers_offset; dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if (!dev->agp_buffer_map) { dev->dev_private = (void *)dev_priv; diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c index 80d8966397c1..697cefccd007 100644 --- a/drivers/char/drm/i830_dma.c +++ b/drivers/char/drm/i830_dma.c @@ -358,6 +358,7 @@ static int i830_dma_initialize(drm_device_t *dev, DRM_ERROR("can not find mmio map!\n"); return -EINVAL; } + dev->agp_buffer_token = init->buffers_offset; dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if(!dev->agp_buffer_map) { dev->dev_private = (void *)dev_priv; diff --git a/drivers/char/drm/mga_dma.c b/drivers/char/drm/mga_dma.c index 7899e281d062..567b425b784f 100644 --- a/drivers/char/drm/mga_dma.c +++ b/drivers/char/drm/mga_dma.c @@ -825,6 +825,7 @@ static int mga_do_init_dma( drm_device_t *dev, drm_mga_init_t *init ) DRM_ERROR("failed to find primary dma region!\n"); return DRM_ERR(EINVAL); } + dev->agp_buffer_token = init->buffers_offset; dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if (!dev->agp_buffer_map) { DRM_ERROR("failed to find dma buffer region!\n"); diff --git a/drivers/char/drm/mga_drm.h b/drivers/char/drm/mga_drm.h index 2d8aa790379e..d20aab3bd57b 100644 --- a/drivers/char/drm/mga_drm.h +++ b/drivers/char/drm/mga_drm.h @@ -312,7 +312,7 @@ typedef struct drm_mga_dma_bootstrap { * an IOMMU) is being used for "AGP" textures. */ /*@{*/ - drm_handle_t texture_handle; /**< Handle used to map AGP textures. */ + unsigned long texture_handle; /**< Handle used to map AGP textures. */ uint32_t texture_size; /**< Size of the AGP texture region. */ /*@}*/ diff --git a/drivers/char/drm/mga_ioc32.c b/drivers/char/drm/mga_ioc32.c index bc745cfa2095..77d738e75a4d 100644 --- a/drivers/char/drm/mga_ioc32.c +++ b/drivers/char/drm/mga_ioc32.c @@ -129,9 +129,76 @@ static int compat_mga_getparam(struct file *file, unsigned int cmd, DRM_IOCTL_MGA_GETPARAM, (unsigned long)getparam); } +typedef struct drm_mga_drm_bootstrap32 { + u32 texture_handle; + u32 texture_size; + u32 primary_size; + u32 secondary_bin_count; + u32 secondary_bin_size; + u32 agp_mode; + u8 agp_size; +} drm_mga_dma_bootstrap32_t; + +static int compat_mga_dma_bootstrap(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_mga_dma_bootstrap32_t dma_bootstrap32; + drm_mga_dma_bootstrap_t __user *dma_bootstrap; + int err; + + if (copy_from_user(&dma_bootstrap32, (void __user *)arg, + sizeof(dma_bootstrap32))) + return -EFAULT; + + dma_bootstrap = compat_alloc_user_space(sizeof(*dma_bootstrap)); + if (!access_ok(VERIFY_WRITE, dma_bootstrap, sizeof(*dma_bootstrap)) + || __put_user(dma_bootstrap32.texture_handle, + &dma_bootstrap->texture_handle) + || __put_user(dma_bootstrap32.texture_size, + &dma_bootstrap->texture_size) + || __put_user(dma_bootstrap32.primary_size, + &dma_bootstrap->primary_size) + || __put_user(dma_bootstrap32.secondary_bin_count, + &dma_bootstrap->secondary_bin_count) + || __put_user(dma_bootstrap32.secondary_bin_size, + &dma_bootstrap->secondary_bin_size) + || __put_user(dma_bootstrap32.agp_mode, &dma_bootstrap->agp_mode) + || __put_user(dma_bootstrap32.agp_size, &dma_bootstrap->agp_size)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_MGA_DMA_BOOTSTRAP, + (unsigned long)dma_bootstrap); + if (err) + return err; + + if (__get_user(dma_bootstrap32.texture_handle, + &dma_bootstrap->texture_handle) + || __get_user(dma_bootstrap32.texture_size, + &dma_bootstrap->texture_size) + || __get_user(dma_bootstrap32.primary_size, + &dma_bootstrap->primary_size) + || __get_user(dma_bootstrap32.secondary_bin_count, + &dma_bootstrap->secondary_bin_count) + || __get_user(dma_bootstrap32.secondary_bin_size, + &dma_bootstrap->secondary_bin_size) + || __get_user(dma_bootstrap32.agp_mode, + &dma_bootstrap->agp_mode) + || __get_user(dma_bootstrap32.agp_size, + &dma_bootstrap->agp_size)) + return -EFAULT; + + if (copy_to_user((void __user *)arg, &dma_bootstrap32, + sizeof(dma_bootstrap32))) + return -EFAULT; + + return 0; +} + drm_ioctl_compat_t *mga_compat_ioctls[] = { [DRM_MGA_INIT] = compat_mga_init, [DRM_MGA_GETPARAM] = compat_mga_getparam, + [DRM_MGA_DMA_BOOTSTRAP] = compat_mga_dma_bootstrap, }; /** diff --git a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c index 08ed8d01d9d9..895152206b31 100644 --- a/drivers/char/drm/r128_cce.c +++ b/drivers/char/drm/r128_cce.c @@ -326,7 +326,8 @@ static void r128_cce_init_ring_buffer( drm_device_t *dev, ring_start = dev_priv->cce_ring->offset - dev->agp->base; else #endif - ring_start = dev_priv->cce_ring->offset - dev->sg->handle; + ring_start = dev_priv->cce_ring->offset - + (unsigned long)dev->sg->virtual; R128_WRITE( R128_PM4_BUFFER_OFFSET, ring_start | R128_AGP_OFFSET ); @@ -487,6 +488,7 @@ static int r128_do_init_cce( drm_device_t *dev, drm_r128_init_t *init ) r128_do_cleanup_cce( dev ); return DRM_ERR(EINVAL); } + dev->agp_buffer_token = init->buffers_offset; dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if(!dev->agp_buffer_map) { DRM_ERROR("could not find dma buffer region!\n"); @@ -537,7 +539,7 @@ static int r128_do_init_cce( drm_device_t *dev, drm_r128_init_t *init ) dev_priv->cce_buffers_offset = dev->agp->base; else #endif - dev_priv->cce_buffers_offset = dev->sg->handle; + dev_priv->cce_buffers_offset = (unsigned long)dev->sg->virtual; dev_priv->ring.start = (u32 *)dev_priv->cce_ring->handle; dev_priv->ring.end = ((u32 *)dev_priv->cce_ring->handle diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c index 8255cc6fdc28..f24a27c4dd17 100644 --- a/drivers/char/drm/radeon_cp.c +++ b/drivers/char/drm/radeon_cp.c @@ -1407,6 +1407,7 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init ) radeon_do_cleanup_cp(dev); return DRM_ERR(EINVAL); } + dev->agp_buffer_token = init->buffers_offset; dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if(!dev->agp_buffer_map) { DRM_ERROR("could not find dma buffer region!\n"); From db215327c62c2db533afb322761fa04ea6244164 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 22:13:15 +1000 Subject: [PATCH 059/584] drm: switch drm_handle_t to unsigned int This converts the drm_handle_t to unsigned int. This is currently safe to do as we don't pass these across the kernel/user boundary, but userspace does use these, but no-one builds userspace against the kernel headers at present so it is okay to switch over the kernel copy of drm.h at this point. (The CVS tree will switch over soon in sync with some Mesa changes) From: Egbert Eich Signed-off-by: Dave Airlie --- drivers/char/drm/drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/drm/drm.h b/drivers/char/drm/drm.h index 3a4745f76cf0..fc6598a81acd 100644 --- a/drivers/char/drm/drm.h +++ b/drivers/char/drm/drm.h @@ -98,7 +98,7 @@ #define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) -typedef unsigned long drm_handle_t; +typedef unsigned int drm_handle_t; typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; From 1fad99499afdd2730adb1d53413b91580b1f0662 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 22:40:34 +1000 Subject: [PATCH 060/584] drm: remove the gamma driver The gamma driver has been broken for quite a while, it doesn't build, we don't have a userspace, mine is in Ireland etc... Signed-off-by: Dave Airlie --- drivers/char/drm/Kconfig | 7 - drivers/char/drm/Makefile | 2 - drivers/char/drm/drm_pciids.h | 4 - drivers/char/drm/gamma_context.h | 492 ---------------- drivers/char/drm/gamma_dma.c | 946 ------------------------------- drivers/char/drm/gamma_drm.h | 90 --- drivers/char/drm/gamma_drv.c | 59 -- drivers/char/drm/gamma_drv.h | 147 ----- drivers/char/drm/gamma_lists.h | 215 ------- drivers/char/drm/gamma_lock.h | 140 ----- drivers/char/drm/gamma_old_dma.h | 313 ---------- 11 files changed, 2415 deletions(-) delete mode 100644 drivers/char/drm/gamma_context.h delete mode 100644 drivers/char/drm/gamma_dma.c delete mode 100644 drivers/char/drm/gamma_drm.h delete mode 100644 drivers/char/drm/gamma_drv.c delete mode 100644 drivers/char/drm/gamma_drv.h delete mode 100644 drivers/char/drm/gamma_lists.h delete mode 100644 drivers/char/drm/gamma_lock.h delete mode 100644 drivers/char/drm/gamma_old_dma.h diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig index d6c50312aec6..f31b9706ef65 100644 --- a/drivers/char/drm/Kconfig +++ b/drivers/char/drm/Kconfig @@ -23,13 +23,6 @@ config DRM_TDFX Choose this option if you have a 3dfx Banshee or Voodoo3 (or later), graphics card. If M is selected, the module will be called tdfx. -config DRM_GAMMA - tristate "3dlabs GMX 2000" - depends on DRM && BROKEN - help - This is the old gamma driver, please tell me if it might actually - work. - config DRM_R128 tristate "ATI Rage 128" depends on DRM && PCI diff --git a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile index ddd941045b1f..3f0cf8e9cc50 100644 --- a/drivers/char/drm/Makefile +++ b/drivers/char/drm/Makefile @@ -8,7 +8,6 @@ drm-objs := drm_auth.o drm_bufs.o drm_context.o drm_dma.o drm_drawable.o \ drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ drm_sysfs.o -gamma-objs := gamma_drv.o gamma_dma.o tdfx-objs := tdfx_drv.o r128-objs := r128_drv.o r128_cce.o r128_state.o r128_irq.o mga-objs := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o @@ -29,7 +28,6 @@ i915-objs += i915_ioc32.o endif obj-$(CONFIG_DRM) += drm.o -obj-$(CONFIG_DRM_GAMMA) += gamma.o obj-$(CONFIG_DRM_TDFX) += tdfx.o obj-$(CONFIG_DRM_R128) += r128.o obj-$(CONFIG_DRM_RADEON)+= radeon.o diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h index 4f317ec092ee..8e264f9c1a1e 100644 --- a/drivers/char/drm/drm_pciids.h +++ b/drivers/char/drm/drm_pciids.h @@ -182,10 +182,6 @@ {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0, 0, 0} -#define gamma_PCI_IDS \ - {0x3d3d, 0x0008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0, 0, 0} - #define savage_PCI_IDS \ {0x5333, 0x8a22, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x5333, 0x8a23, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ diff --git a/drivers/char/drm/gamma_context.h b/drivers/char/drm/gamma_context.h deleted file mode 100644 index d11b507f87ee..000000000000 --- a/drivers/char/drm/gamma_context.h +++ /dev/null @@ -1,492 +0,0 @@ -/* drm_context.h -- IOCTLs for generic contexts -*- linux-c -*- - * Created: Fri Nov 24 18:31:37 2000 by gareth@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Gareth Hughes - * ChangeLog: - * 2001-11-16 Torsten Duwe - * added context constructor/destructor hooks, - * needed by SiS driver's memory management. - */ - -/* ================================================================ - * Old-style context support -- only used by gamma. - */ - - -/* The drm_read and drm_write_string code (especially that which manages - the circular buffer), is based on Alessandro Rubini's LINUX DEVICE - DRIVERS (Cambridge: O'Reilly, 1998), pages 111-113. */ - -ssize_t gamma_fops_read(struct file *filp, char __user *buf, size_t count, loff_t *off) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - int left; - int avail; - int send; - int cur; - - DRM_DEBUG("%p, %p\n", dev->buf_rp, dev->buf_wp); - - while (dev->buf_rp == dev->buf_wp) { - DRM_DEBUG(" sleeping\n"); - if (filp->f_flags & O_NONBLOCK) { - return -EAGAIN; - } - interruptible_sleep_on(&dev->buf_readers); - if (signal_pending(current)) { - DRM_DEBUG(" interrupted\n"); - return -ERESTARTSYS; - } - DRM_DEBUG(" awake\n"); - } - - left = (dev->buf_rp + DRM_BSZ - dev->buf_wp) % DRM_BSZ; - avail = DRM_BSZ - left; - send = DRM_MIN(avail, count); - - while (send) { - if (dev->buf_wp > dev->buf_rp) { - cur = DRM_MIN(send, dev->buf_wp - dev->buf_rp); - } else { - cur = DRM_MIN(send, dev->buf_end - dev->buf_rp); - } - if (copy_to_user(buf, dev->buf_rp, cur)) - return -EFAULT; - dev->buf_rp += cur; - if (dev->buf_rp == dev->buf_end) dev->buf_rp = dev->buf; - send -= cur; - } - - wake_up_interruptible(&dev->buf_writers); - return DRM_MIN(avail, count); -} - - -/* In an incredibly convoluted setup, the kernel module actually calls - * back into the X server to perform context switches on behalf of the - * 3d clients. - */ -int DRM(write_string)(drm_device_t *dev, const char *s) -{ - int left = (dev->buf_rp + DRM_BSZ - dev->buf_wp) % DRM_BSZ; - int send = strlen(s); - int count; - - DRM_DEBUG("%d left, %d to send (%p, %p)\n", - left, send, dev->buf_rp, dev->buf_wp); - - if (left == 1 || dev->buf_wp != dev->buf_rp) { - DRM_ERROR("Buffer not empty (%d left, wp = %p, rp = %p)\n", - left, - dev->buf_wp, - dev->buf_rp); - } - - while (send) { - if (dev->buf_wp >= dev->buf_rp) { - count = DRM_MIN(send, dev->buf_end - dev->buf_wp); - if (count == left) --count; /* Leave a hole */ - } else { - count = DRM_MIN(send, dev->buf_rp - dev->buf_wp - 1); - } - strncpy(dev->buf_wp, s, count); - dev->buf_wp += count; - if (dev->buf_wp == dev->buf_end) dev->buf_wp = dev->buf; - send -= count; - } - - if (dev->buf_async) kill_fasync(&dev->buf_async, SIGIO, POLL_IN); - - DRM_DEBUG("waking\n"); - wake_up_interruptible(&dev->buf_readers); - return 0; -} - -unsigned int gamma_fops_poll(struct file *filp, struct poll_table_struct *wait) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - - poll_wait(filp, &dev->buf_readers, wait); - if (dev->buf_wp != dev->buf_rp) return POLLIN | POLLRDNORM; - return 0; -} - -int DRM(context_switch)(drm_device_t *dev, int old, int new) -{ - char buf[64]; - drm_queue_t *q; - - if (test_and_set_bit(0, &dev->context_flag)) { - DRM_ERROR("Reentering -- FIXME\n"); - return -EBUSY; - } - - DRM_DEBUG("Context switch from %d to %d\n", old, new); - - if (new >= dev->queue_count) { - clear_bit(0, &dev->context_flag); - return -EINVAL; - } - - if (new == dev->last_context) { - clear_bit(0, &dev->context_flag); - return 0; - } - - q = dev->queuelist[new]; - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) == 1) { - atomic_dec(&q->use_count); - clear_bit(0, &dev->context_flag); - return -EINVAL; - } - - /* This causes the X server to wake up & do a bunch of hardware - * interaction to actually effect the context switch. - */ - sprintf(buf, "C %d %d\n", old, new); - DRM(write_string)(dev, buf); - - atomic_dec(&q->use_count); - - return 0; -} - -int DRM(context_switch_complete)(drm_device_t *dev, int new) -{ - drm_device_dma_t *dma = dev->dma; - - dev->last_context = new; /* PRE/POST: This is the _only_ writer. */ - dev->last_switch = jiffies; - - if (!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) { - DRM_ERROR("Lock isn't held after context switch\n"); - } - - if (!dma || !(dma->next_buffer && dma->next_buffer->while_locked)) { - if (DRM(lock_free)(dev, &dev->lock.hw_lock->lock, - DRM_KERNEL_CONTEXT)) { - DRM_ERROR("Cannot free lock\n"); - } - } - - clear_bit(0, &dev->context_flag); - wake_up_interruptible(&dev->context_wait); - - return 0; -} - -static int DRM(init_queue)(drm_device_t *dev, drm_queue_t *q, drm_ctx_t *ctx) -{ - DRM_DEBUG("\n"); - - if (atomic_read(&q->use_count) != 1 - || atomic_read(&q->finalization) - || atomic_read(&q->block_count)) { - DRM_ERROR("New queue is already in use: u%d f%d b%d\n", - atomic_read(&q->use_count), - atomic_read(&q->finalization), - atomic_read(&q->block_count)); - } - - atomic_set(&q->finalization, 0); - atomic_set(&q->block_count, 0); - atomic_set(&q->block_read, 0); - atomic_set(&q->block_write, 0); - atomic_set(&q->total_queued, 0); - atomic_set(&q->total_flushed, 0); - atomic_set(&q->total_locks, 0); - - init_waitqueue_head(&q->write_queue); - init_waitqueue_head(&q->read_queue); - init_waitqueue_head(&q->flush_queue); - - q->flags = ctx->flags; - - DRM(waitlist_create)(&q->waitlist, dev->dma->buf_count); - - return 0; -} - - -/* drm_alloc_queue: -PRE: 1) dev->queuelist[0..dev->queue_count] is allocated and will not - disappear (so all deallocation must be done after IOCTLs are off) - 2) dev->queue_count < dev->queue_slots - 3) dev->queuelist[i].use_count == 0 and - dev->queuelist[i].finalization == 0 if i not in use -POST: 1) dev->queuelist[i].use_count == 1 - 2) dev->queue_count < dev->queue_slots */ - -static int DRM(alloc_queue)(drm_device_t *dev) -{ - int i; - drm_queue_t *queue; - int oldslots; - int newslots; - /* Check for a free queue */ - for (i = 0; i < dev->queue_count; i++) { - atomic_inc(&dev->queuelist[i]->use_count); - if (atomic_read(&dev->queuelist[i]->use_count) == 1 - && !atomic_read(&dev->queuelist[i]->finalization)) { - DRM_DEBUG("%d (free)\n", i); - return i; - } - atomic_dec(&dev->queuelist[i]->use_count); - } - /* Allocate a new queue */ - down(&dev->struct_sem); - - queue = DRM(alloc)(sizeof(*queue), DRM_MEM_QUEUES); - memset(queue, 0, sizeof(*queue)); - atomic_set(&queue->use_count, 1); - - ++dev->queue_count; - if (dev->queue_count >= dev->queue_slots) { - oldslots = dev->queue_slots * sizeof(*dev->queuelist); - if (!dev->queue_slots) dev->queue_slots = 1; - dev->queue_slots *= 2; - newslots = dev->queue_slots * sizeof(*dev->queuelist); - - dev->queuelist = DRM(realloc)(dev->queuelist, - oldslots, - newslots, - DRM_MEM_QUEUES); - if (!dev->queuelist) { - up(&dev->struct_sem); - DRM_DEBUG("out of memory\n"); - return -ENOMEM; - } - } - dev->queuelist[dev->queue_count-1] = queue; - - up(&dev->struct_sem); - DRM_DEBUG("%d (new)\n", dev->queue_count - 1); - return dev->queue_count - 1; -} - -int DRM(resctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_ctx_res_t __user *argp = (void __user *)arg; - drm_ctx_res_t res; - drm_ctx_t ctx; - int i; - - DRM_DEBUG("%d\n", DRM_RESERVED_CONTEXTS); - if (copy_from_user(&res, argp, sizeof(res))) - return -EFAULT; - if (res.count >= DRM_RESERVED_CONTEXTS) { - memset(&ctx, 0, sizeof(ctx)); - for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) { - ctx.handle = i; - if (copy_to_user(&res.contexts[i], - &i, - sizeof(i))) - return -EFAULT; - } - } - res.count = DRM_RESERVED_CONTEXTS; - if (copy_to_user(argp, &res, sizeof(res))) - return -EFAULT; - return 0; -} - -int DRM(addctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t ctx; - drm_ctx_t __user *argp = (void __user *)arg; - - if (copy_from_user(&ctx, argp, sizeof(ctx))) - return -EFAULT; - if ((ctx.handle = DRM(alloc_queue)(dev)) == DRM_KERNEL_CONTEXT) { - /* Init kernel's context and get a new one. */ - DRM(init_queue)(dev, dev->queuelist[ctx.handle], &ctx); - ctx.handle = DRM(alloc_queue)(dev); - } - DRM(init_queue)(dev, dev->queuelist[ctx.handle], &ctx); - DRM_DEBUG("%d\n", ctx.handle); - if (copy_to_user(argp, &ctx, sizeof(ctx))) - return -EFAULT; - return 0; -} - -int DRM(modctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t ctx; - drm_queue_t *q; - - if (copy_from_user(&ctx, (drm_ctx_t __user *)arg, sizeof(ctx))) - return -EFAULT; - - DRM_DEBUG("%d\n", ctx.handle); - - if (ctx.handle < 0 || ctx.handle >= dev->queue_count) return -EINVAL; - q = dev->queuelist[ctx.handle]; - - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) == 1) { - /* No longer in use */ - atomic_dec(&q->use_count); - return -EINVAL; - } - - if (DRM_BUFCOUNT(&q->waitlist)) { - atomic_dec(&q->use_count); - return -EBUSY; - } - - q->flags = ctx.flags; - - atomic_dec(&q->use_count); - return 0; -} - -int DRM(getctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t __user *argp = (void __user *)arg; - drm_ctx_t ctx; - drm_queue_t *q; - - if (copy_from_user(&ctx, argp, sizeof(ctx))) - return -EFAULT; - - DRM_DEBUG("%d\n", ctx.handle); - - if (ctx.handle >= dev->queue_count) return -EINVAL; - q = dev->queuelist[ctx.handle]; - - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) == 1) { - /* No longer in use */ - atomic_dec(&q->use_count); - return -EINVAL; - } - - ctx.flags = q->flags; - atomic_dec(&q->use_count); - - if (copy_to_user(argp, &ctx, sizeof(ctx))) - return -EFAULT; - - return 0; -} - -int DRM(switchctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t ctx; - - if (copy_from_user(&ctx, (drm_ctx_t __user *)arg, sizeof(ctx))) - return -EFAULT; - DRM_DEBUG("%d\n", ctx.handle); - return DRM(context_switch)(dev, dev->last_context, ctx.handle); -} - -int DRM(newctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t ctx; - - if (copy_from_user(&ctx, (drm_ctx_t __user *)arg, sizeof(ctx))) - return -EFAULT; - DRM_DEBUG("%d\n", ctx.handle); - DRM(context_switch_complete)(dev, ctx.handle); - - return 0; -} - -int DRM(rmctx)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_t ctx; - drm_queue_t *q; - drm_buf_t *buf; - - if (copy_from_user(&ctx, (drm_ctx_t __user *)arg, sizeof(ctx))) - return -EFAULT; - DRM_DEBUG("%d\n", ctx.handle); - - if (ctx.handle >= dev->queue_count) return -EINVAL; - q = dev->queuelist[ctx.handle]; - - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) == 1) { - /* No longer in use */ - atomic_dec(&q->use_count); - return -EINVAL; - } - - atomic_inc(&q->finalization); /* Mark queue in finalization state */ - atomic_sub(2, &q->use_count); /* Mark queue as unused (pending - finalization) */ - - while (test_and_set_bit(0, &dev->interrupt_flag)) { - schedule(); - if (signal_pending(current)) { - clear_bit(0, &dev->interrupt_flag); - return -EINTR; - } - } - /* Remove queued buffers */ - while ((buf = DRM(waitlist_get)(&q->waitlist))) { - DRM(free_buffer)(dev, buf); - } - clear_bit(0, &dev->interrupt_flag); - - /* Wakeup blocked processes */ - wake_up_interruptible(&q->read_queue); - wake_up_interruptible(&q->write_queue); - wake_up_interruptible(&q->flush_queue); - - /* Finalization over. Queue is made - available when both use_count and - finalization become 0, which won't - happen until all the waiting processes - stop waiting. */ - atomic_dec(&q->finalization); - return 0; -} - diff --git a/drivers/char/drm/gamma_dma.c b/drivers/char/drm/gamma_dma.c deleted file mode 100644 index e486fb8d31e9..000000000000 --- a/drivers/char/drm/gamma_dma.c +++ /dev/null @@ -1,946 +0,0 @@ -/* gamma_dma.c -- DMA support for GMX 2000 -*- linux-c -*- - * Created: Fri Mar 19 14:30:16 1999 by faith@precisioninsight.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * - */ - -#include "gamma.h" -#include "drmP.h" -#include "drm.h" -#include "gamma_drm.h" -#include "gamma_drv.h" - -#include /* For task queue support */ -#include - -static inline void gamma_dma_dispatch(drm_device_t *dev, unsigned long address, - unsigned long length) -{ - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - mb(); - while ( GAMMA_READ(GAMMA_INFIFOSPACE) < 2) - cpu_relax(); - - GAMMA_WRITE(GAMMA_DMAADDRESS, address); - - while (GAMMA_READ(GAMMA_GCOMMANDSTATUS) != 4) - cpu_relax(); - - GAMMA_WRITE(GAMMA_DMACOUNT, length / 4); -} - -void gamma_dma_quiescent_single(drm_device_t *dev) -{ - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - while (GAMMA_READ(GAMMA_DMACOUNT)) - cpu_relax(); - - while (GAMMA_READ(GAMMA_INFIFOSPACE) < 2) - cpu_relax(); - - GAMMA_WRITE(GAMMA_FILTERMODE, 1 << 10); - GAMMA_WRITE(GAMMA_SYNC, 0); - - do { - while (!GAMMA_READ(GAMMA_OUTFIFOWORDS)) - cpu_relax(); - } while (GAMMA_READ(GAMMA_OUTPUTFIFO) != GAMMA_SYNC_TAG); -} - -void gamma_dma_quiescent_dual(drm_device_t *dev) -{ - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - while (GAMMA_READ(GAMMA_DMACOUNT)) - cpu_relax(); - - while (GAMMA_READ(GAMMA_INFIFOSPACE) < 3) - cpu_relax(); - - GAMMA_WRITE(GAMMA_BROADCASTMASK, 3); - GAMMA_WRITE(GAMMA_FILTERMODE, 1 << 10); - GAMMA_WRITE(GAMMA_SYNC, 0); - - /* Read from first MX */ - do { - while (!GAMMA_READ(GAMMA_OUTFIFOWORDS)) - cpu_relax(); - } while (GAMMA_READ(GAMMA_OUTPUTFIFO) != GAMMA_SYNC_TAG); - - /* Read from second MX */ - do { - while (!GAMMA_READ(GAMMA_OUTFIFOWORDS + 0x10000)) - cpu_relax(); - } while (GAMMA_READ(GAMMA_OUTPUTFIFO + 0x10000) != GAMMA_SYNC_TAG); -} - -void gamma_dma_ready(drm_device_t *dev) -{ - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - while (GAMMA_READ(GAMMA_DMACOUNT)) - cpu_relax(); -} - -static inline int gamma_dma_is_ready(drm_device_t *dev) -{ - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - return (!GAMMA_READ(GAMMA_DMACOUNT)); -} - -irqreturn_t gamma_driver_irq_handler( DRM_IRQ_ARGS ) -{ - drm_device_t *dev = (drm_device_t *)arg; - drm_device_dma_t *dma = dev->dma; - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - - /* FIXME: should check whether we're actually interested in the interrupt? */ - atomic_inc(&dev->counts[6]); /* _DRM_STAT_IRQ */ - - while (GAMMA_READ(GAMMA_INFIFOSPACE) < 3) - cpu_relax(); - - GAMMA_WRITE(GAMMA_GDELAYTIMER, 0xc350/2); /* 0x05S */ - GAMMA_WRITE(GAMMA_GCOMMANDINTFLAGS, 8); - GAMMA_WRITE(GAMMA_GINTFLAGS, 0x2001); - if (gamma_dma_is_ready(dev)) { - /* Free previous buffer */ - if (test_and_set_bit(0, &dev->dma_flag)) - return IRQ_HANDLED; - if (dma->this_buffer) { - gamma_free_buffer(dev, dma->this_buffer); - dma->this_buffer = NULL; - } - clear_bit(0, &dev->dma_flag); - - /* Dispatch new buffer */ - schedule_work(&dev->work); - } - return IRQ_HANDLED; -} - -/* Only called by gamma_dma_schedule. */ -static int gamma_do_dma(drm_device_t *dev, int locked) -{ - unsigned long address; - unsigned long length; - drm_buf_t *buf; - int retcode = 0; - drm_device_dma_t *dma = dev->dma; - - if (test_and_set_bit(0, &dev->dma_flag)) return -EBUSY; - - - if (!dma->next_buffer) { - DRM_ERROR("No next_buffer\n"); - clear_bit(0, &dev->dma_flag); - return -EINVAL; - } - - buf = dma->next_buffer; - /* WE NOW ARE ON LOGICAL PAGES!! - using page table setup in dma_init */ - /* So we pass the buffer index value into the physical page offset */ - address = buf->idx << 12; - length = buf->used; - - DRM_DEBUG("context %d, buffer %d (%ld bytes)\n", - buf->context, buf->idx, length); - - if (buf->list == DRM_LIST_RECLAIM) { - gamma_clear_next_buffer(dev); - gamma_free_buffer(dev, buf); - clear_bit(0, &dev->dma_flag); - return -EINVAL; - } - - if (!length) { - DRM_ERROR("0 length buffer\n"); - gamma_clear_next_buffer(dev); - gamma_free_buffer(dev, buf); - clear_bit(0, &dev->dma_flag); - return 0; - } - - if (!gamma_dma_is_ready(dev)) { - clear_bit(0, &dev->dma_flag); - return -EBUSY; - } - - if (buf->while_locked) { - if (!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) { - DRM_ERROR("Dispatching buffer %d from pid %d" - " \"while locked\", but no lock held\n", - buf->idx, current->pid); - } - } else { - if (!locked && !gamma_lock_take(&dev->lock.hw_lock->lock, - DRM_KERNEL_CONTEXT)) { - clear_bit(0, &dev->dma_flag); - return -EBUSY; - } - } - - if (dev->last_context != buf->context - && !(dev->queuelist[buf->context]->flags - & _DRM_CONTEXT_PRESERVED)) { - /* PRE: dev->last_context != buf->context */ - if (DRM(context_switch)(dev, dev->last_context, - buf->context)) { - DRM(clear_next_buffer)(dev); - DRM(free_buffer)(dev, buf); - } - retcode = -EBUSY; - goto cleanup; - - /* POST: we will wait for the context - switch and will dispatch on a later call - when dev->last_context == buf->context. - NOTE WE HOLD THE LOCK THROUGHOUT THIS - TIME! */ - } - - gamma_clear_next_buffer(dev); - buf->pending = 1; - buf->waiting = 0; - buf->list = DRM_LIST_PEND; - - /* WE NOW ARE ON LOGICAL PAGES!!! - overriding address */ - address = buf->idx << 12; - - gamma_dma_dispatch(dev, address, length); - gamma_free_buffer(dev, dma->this_buffer); - dma->this_buffer = buf; - - atomic_inc(&dev->counts[7]); /* _DRM_STAT_DMA */ - atomic_add(length, &dev->counts[8]); /* _DRM_STAT_PRIMARY */ - - if (!buf->while_locked && !dev->context_flag && !locked) { - if (gamma_lock_free(dev, &dev->lock.hw_lock->lock, - DRM_KERNEL_CONTEXT)) { - DRM_ERROR("\n"); - } - } -cleanup: - - clear_bit(0, &dev->dma_flag); - - - return retcode; -} - -static void gamma_dma_timer_bh(unsigned long dev) -{ - gamma_dma_schedule((drm_device_t *)dev, 0); -} - -void gamma_irq_immediate_bh(void *dev) -{ - gamma_dma_schedule(dev, 0); -} - -int gamma_dma_schedule(drm_device_t *dev, int locked) -{ - int next; - drm_queue_t *q; - drm_buf_t *buf; - int retcode = 0; - int processed = 0; - int missed; - int expire = 20; - drm_device_dma_t *dma = dev->dma; - - if (test_and_set_bit(0, &dev->interrupt_flag)) { - /* Not reentrant */ - atomic_inc(&dev->counts[10]); /* _DRM_STAT_MISSED */ - return -EBUSY; - } - missed = atomic_read(&dev->counts[10]); - - -again: - if (dev->context_flag) { - clear_bit(0, &dev->interrupt_flag); - return -EBUSY; - } - if (dma->next_buffer) { - /* Unsent buffer that was previously - selected, but that couldn't be sent - because the lock could not be obtained - or the DMA engine wasn't ready. Try - again. */ - if (!(retcode = gamma_do_dma(dev, locked))) ++processed; - } else { - do { - next = gamma_select_queue(dev, gamma_dma_timer_bh); - if (next >= 0) { - q = dev->queuelist[next]; - buf = gamma_waitlist_get(&q->waitlist); - dma->next_buffer = buf; - dma->next_queue = q; - if (buf && buf->list == DRM_LIST_RECLAIM) { - gamma_clear_next_buffer(dev); - gamma_free_buffer(dev, buf); - } - } - } while (next >= 0 && !dma->next_buffer); - if (dma->next_buffer) { - if (!(retcode = gamma_do_dma(dev, locked))) { - ++processed; - } - } - } - - if (--expire) { - if (missed != atomic_read(&dev->counts[10])) { - if (gamma_dma_is_ready(dev)) goto again; - } - if (processed && gamma_dma_is_ready(dev)) { - processed = 0; - goto again; - } - } - - clear_bit(0, &dev->interrupt_flag); - - return retcode; -} - -static int gamma_dma_priority(struct file *filp, - drm_device_t *dev, drm_dma_t *d) -{ - unsigned long address; - unsigned long length; - int must_free = 0; - int retcode = 0; - int i; - int idx; - drm_buf_t *buf; - drm_buf_t *last_buf = NULL; - drm_device_dma_t *dma = dev->dma; - int *send_indices = NULL; - int *send_sizes = NULL; - - DECLARE_WAITQUEUE(entry, current); - - /* Turn off interrupt handling */ - while (test_and_set_bit(0, &dev->interrupt_flag)) { - schedule(); - if (signal_pending(current)) return -EINTR; - } - if (!(d->flags & _DRM_DMA_WHILE_LOCKED)) { - while (!gamma_lock_take(&dev->lock.hw_lock->lock, - DRM_KERNEL_CONTEXT)) { - schedule(); - if (signal_pending(current)) { - clear_bit(0, &dev->interrupt_flag); - return -EINTR; - } - } - ++must_free; - } - - send_indices = DRM(alloc)(d->send_count * sizeof(*send_indices), - DRM_MEM_DRIVER); - if (send_indices == NULL) - return -ENOMEM; - if (copy_from_user(send_indices, d->send_indices, - d->send_count * sizeof(*send_indices))) { - retcode = -EFAULT; - goto cleanup; - } - - send_sizes = DRM(alloc)(d->send_count * sizeof(*send_sizes), - DRM_MEM_DRIVER); - if (send_sizes == NULL) - return -ENOMEM; - if (copy_from_user(send_sizes, d->send_sizes, - d->send_count * sizeof(*send_sizes))) { - retcode = -EFAULT; - goto cleanup; - } - - for (i = 0; i < d->send_count; i++) { - idx = send_indices[i]; - if (idx < 0 || idx >= dma->buf_count) { - DRM_ERROR("Index %d (of %d max)\n", - send_indices[i], dma->buf_count - 1); - continue; - } - buf = dma->buflist[ idx ]; - if (buf->filp != filp) { - DRM_ERROR("Process %d using buffer not owned\n", - current->pid); - retcode = -EINVAL; - goto cleanup; - } - if (buf->list != DRM_LIST_NONE) { - DRM_ERROR("Process %d using buffer on list %d\n", - current->pid, buf->list); - retcode = -EINVAL; - goto cleanup; - } - /* This isn't a race condition on - buf->list, since our concern is the - buffer reclaim during the time the - process closes the /dev/drm? handle, so - it can't also be doing DMA. */ - buf->list = DRM_LIST_PRIO; - buf->used = send_sizes[i]; - buf->context = d->context; - buf->while_locked = d->flags & _DRM_DMA_WHILE_LOCKED; - address = (unsigned long)buf->address; - length = buf->used; - if (!length) { - DRM_ERROR("0 length buffer\n"); - } - if (buf->pending) { - DRM_ERROR("Sending pending buffer:" - " buffer %d, offset %d\n", - send_indices[i], i); - retcode = -EINVAL; - goto cleanup; - } - if (buf->waiting) { - DRM_ERROR("Sending waiting buffer:" - " buffer %d, offset %d\n", - send_indices[i], i); - retcode = -EINVAL; - goto cleanup; - } - buf->pending = 1; - - if (dev->last_context != buf->context - && !(dev->queuelist[buf->context]->flags - & _DRM_CONTEXT_PRESERVED)) { - add_wait_queue(&dev->context_wait, &entry); - current->state = TASK_INTERRUPTIBLE; - /* PRE: dev->last_context != buf->context */ - DRM(context_switch)(dev, dev->last_context, - buf->context); - /* POST: we will wait for the context - switch and will dispatch on a later call - when dev->last_context == buf->context. - NOTE WE HOLD THE LOCK THROUGHOUT THIS - TIME! */ - schedule(); - current->state = TASK_RUNNING; - remove_wait_queue(&dev->context_wait, &entry); - if (signal_pending(current)) { - retcode = -EINTR; - goto cleanup; - } - if (dev->last_context != buf->context) { - DRM_ERROR("Context mismatch: %d %d\n", - dev->last_context, - buf->context); - } - } - - gamma_dma_dispatch(dev, address, length); - atomic_inc(&dev->counts[9]); /* _DRM_STAT_SPECIAL */ - atomic_add(length, &dev->counts[8]); /* _DRM_STAT_PRIMARY */ - - if (last_buf) { - gamma_free_buffer(dev, last_buf); - } - last_buf = buf; - } - - -cleanup: - if (last_buf) { - gamma_dma_ready(dev); - gamma_free_buffer(dev, last_buf); - } - if (send_indices) - DRM(free)(send_indices, d->send_count * sizeof(*send_indices), - DRM_MEM_DRIVER); - if (send_sizes) - DRM(free)(send_sizes, d->send_count * sizeof(*send_sizes), - DRM_MEM_DRIVER); - - if (must_free && !dev->context_flag) { - if (gamma_lock_free(dev, &dev->lock.hw_lock->lock, - DRM_KERNEL_CONTEXT)) { - DRM_ERROR("\n"); - } - } - clear_bit(0, &dev->interrupt_flag); - return retcode; -} - -static int gamma_dma_send_buffers(struct file *filp, - drm_device_t *dev, drm_dma_t *d) -{ - DECLARE_WAITQUEUE(entry, current); - drm_buf_t *last_buf = NULL; - int retcode = 0; - drm_device_dma_t *dma = dev->dma; - int send_index; - - if (get_user(send_index, &d->send_indices[d->send_count-1])) - return -EFAULT; - - if (d->flags & _DRM_DMA_BLOCK) { - last_buf = dma->buflist[send_index]; - add_wait_queue(&last_buf->dma_wait, &entry); - } - - if ((retcode = gamma_dma_enqueue(filp, d))) { - if (d->flags & _DRM_DMA_BLOCK) - remove_wait_queue(&last_buf->dma_wait, &entry); - return retcode; - } - - gamma_dma_schedule(dev, 0); - - if (d->flags & _DRM_DMA_BLOCK) { - DRM_DEBUG("%d waiting\n", current->pid); - for (;;) { - current->state = TASK_INTERRUPTIBLE; - if (!last_buf->waiting && !last_buf->pending) - break; /* finished */ - schedule(); - if (signal_pending(current)) { - retcode = -EINTR; /* Can't restart */ - break; - } - } - current->state = TASK_RUNNING; - DRM_DEBUG("%d running\n", current->pid); - remove_wait_queue(&last_buf->dma_wait, &entry); - if (!retcode - || (last_buf->list==DRM_LIST_PEND && !last_buf->pending)) { - if (!waitqueue_active(&last_buf->dma_wait)) { - gamma_free_buffer(dev, last_buf); - } - } - if (retcode) { - DRM_ERROR("ctx%d w%d p%d c%ld i%d l%d pid:%d\n", - d->context, - last_buf->waiting, - last_buf->pending, - (long)DRM_WAITCOUNT(dev, d->context), - last_buf->idx, - last_buf->list, - current->pid); - } - } - return retcode; -} - -int gamma_dma(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_device_dma_t *dma = dev->dma; - int retcode = 0; - drm_dma_t __user *argp = (void __user *)arg; - drm_dma_t d; - - if (copy_from_user(&d, argp, sizeof(d))) - return -EFAULT; - - if (d.send_count < 0 || d.send_count > dma->buf_count) { - DRM_ERROR("Process %d trying to send %d buffers (of %d max)\n", - current->pid, d.send_count, dma->buf_count); - return -EINVAL; - } - - if (d.request_count < 0 || d.request_count > dma->buf_count) { - DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n", - current->pid, d.request_count, dma->buf_count); - return -EINVAL; - } - - if (d.send_count) { - if (d.flags & _DRM_DMA_PRIORITY) - retcode = gamma_dma_priority(filp, dev, &d); - else - retcode = gamma_dma_send_buffers(filp, dev, &d); - } - - d.granted_count = 0; - - if (!retcode && d.request_count) { - retcode = gamma_dma_get_buffers(filp, &d); - } - - DRM_DEBUG("%d returning, granted = %d\n", - current->pid, d.granted_count); - if (copy_to_user(argp, &d, sizeof(d))) - return -EFAULT; - - return retcode; -} - -/* ============================================================= - * DMA initialization, cleanup - */ - -static int gamma_do_init_dma( drm_device_t *dev, drm_gamma_init_t *init ) -{ - drm_gamma_private_t *dev_priv; - drm_device_dma_t *dma = dev->dma; - drm_buf_t *buf; - int i; - struct list_head *list; - unsigned long *pgt; - - DRM_DEBUG( "%s\n", __FUNCTION__ ); - - dev_priv = DRM(alloc)( sizeof(drm_gamma_private_t), - DRM_MEM_DRIVER ); - if ( !dev_priv ) - return -ENOMEM; - - dev->dev_private = (void *)dev_priv; - - memset( dev_priv, 0, sizeof(drm_gamma_private_t) ); - - dev_priv->num_rast = init->num_rast; - - list_for_each(list, &dev->maplist->head) { - drm_map_list_t *r_list = list_entry(list, drm_map_list_t, head); - if( r_list->map && - r_list->map->type == _DRM_SHM && - r_list->map->flags & _DRM_CONTAINS_LOCK ) { - dev_priv->sarea = r_list->map; - break; - } - } - - dev_priv->mmio0 = drm_core_findmap(dev, init->mmio0); - dev_priv->mmio1 = drm_core_findmap(dev, init->mmio1); - dev_priv->mmio2 = drm_core_findmap(dev, init->mmio2); - dev_priv->mmio3 = drm_core_findmap(dev, init->mmio3); - - dev_priv->sarea_priv = (drm_gamma_sarea_t *) - ((u8 *)dev_priv->sarea->handle + - init->sarea_priv_offset); - - if (init->pcimode) { - buf = dma->buflist[GLINT_DRI_BUF_COUNT]; - pgt = buf->address; - - for (i = 0; i < GLINT_DRI_BUF_COUNT; i++) { - buf = dma->buflist[i]; - *pgt = virt_to_phys((void*)buf->address) | 0x07; - pgt++; - } - - buf = dma->buflist[GLINT_DRI_BUF_COUNT]; - } else { - dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); - drm_core_ioremap( dev->agp_buffer_map, dev); - - buf = dma->buflist[GLINT_DRI_BUF_COUNT]; - pgt = buf->address; - - for (i = 0; i < GLINT_DRI_BUF_COUNT; i++) { - buf = dma->buflist[i]; - *pgt = (unsigned long)buf->address + 0x07; - pgt++; - } - - buf = dma->buflist[GLINT_DRI_BUF_COUNT]; - - while (GAMMA_READ(GAMMA_INFIFOSPACE) < 1); - GAMMA_WRITE( GAMMA_GDMACONTROL, 0xe); - } - while (GAMMA_READ(GAMMA_INFIFOSPACE) < 2); - GAMMA_WRITE( GAMMA_PAGETABLEADDR, virt_to_phys((void*)buf->address) ); - GAMMA_WRITE( GAMMA_PAGETABLELENGTH, 2 ); - - return 0; -} - -int gamma_do_cleanup_dma( drm_device_t *dev ) -{ - DRM_DEBUG( "%s\n", __FUNCTION__ ); - - /* Make sure interrupts are disabled here because the uninstall ioctl - * may not have been called from userspace and after dev_private - * is freed, it's too late. - */ - if (drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - if ( dev->irq_enabled ) - DRM(irq_uninstall)(dev); - - if ( dev->dev_private ) { - - if ( dev->agp_buffer_map != NULL ) - drm_core_ioremapfree( dev->agp_buffer_map, dev ); - - DRM(free)( dev->dev_private, sizeof(drm_gamma_private_t), - DRM_MEM_DRIVER ); - dev->dev_private = NULL; - } - - return 0; -} - -int gamma_dma_init( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_gamma_init_t init; - - LOCK_TEST_WITH_RETURN( dev, filp ); - - if ( copy_from_user( &init, (drm_gamma_init_t __user *)arg, sizeof(init) ) ) - return -EFAULT; - - switch ( init.func ) { - case GAMMA_INIT_DMA: - return gamma_do_init_dma( dev, &init ); - case GAMMA_CLEANUP_DMA: - return gamma_do_cleanup_dma( dev ); - } - - return -EINVAL; -} - -static int gamma_do_copy_dma( drm_device_t *dev, drm_gamma_copy_t *copy ) -{ - drm_device_dma_t *dma = dev->dma; - unsigned int *screenbuf; - - DRM_DEBUG( "%s\n", __FUNCTION__ ); - - /* We've DRM_RESTRICTED this DMA buffer */ - - screenbuf = dma->buflist[ GLINT_DRI_BUF_COUNT + 1 ]->address; - -#if 0 - *buffer++ = 0x180; /* Tag (FilterMode) */ - *buffer++ = 0x200; /* Allow FBColor through */ - *buffer++ = 0x53B; /* Tag */ - *buffer++ = copy->Pitch; - *buffer++ = 0x53A; /* Tag */ - *buffer++ = copy->SrcAddress; - *buffer++ = 0x539; /* Tag */ - *buffer++ = copy->WidthHeight; /* Initiates transfer */ - *buffer++ = 0x53C; /* Tag - DMAOutputAddress */ - *buffer++ = virt_to_phys((void*)screenbuf); - *buffer++ = 0x53D; /* Tag - DMAOutputCount */ - *buffer++ = copy->Count; /* Reads HostOutFifo BLOCKS until ..*/ - - /* Data now sitting in dma->buflist[ GLINT_DRI_BUF_COUNT + 1 ] */ - /* Now put it back to the screen */ - - *buffer++ = 0x180; /* Tag (FilterMode) */ - *buffer++ = 0x400; /* Allow Sync through */ - *buffer++ = 0x538; /* Tag - DMARectangleReadTarget */ - *buffer++ = 0x155; /* FBSourceData | count */ - *buffer++ = 0x537; /* Tag */ - *buffer++ = copy->Pitch; - *buffer++ = 0x536; /* Tag */ - *buffer++ = copy->DstAddress; - *buffer++ = 0x535; /* Tag */ - *buffer++ = copy->WidthHeight; /* Initiates transfer */ - *buffer++ = 0x530; /* Tag - DMAAddr */ - *buffer++ = virt_to_phys((void*)screenbuf); - *buffer++ = 0x531; - *buffer++ = copy->Count; /* initiates DMA transfer of color data */ -#endif - - /* need to dispatch it now */ - - return 0; -} - -int gamma_dma_copy( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_gamma_copy_t copy; - - if ( copy_from_user( ©, (drm_gamma_copy_t __user *)arg, sizeof(copy) ) ) - return -EFAULT; - - return gamma_do_copy_dma( dev, © ); -} - -/* ============================================================= - * Per Context SAREA Support - */ - -int gamma_getsareactx(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_priv_map_t __user *argp = (void __user *)arg; - drm_ctx_priv_map_t request; - drm_map_t *map; - - if (copy_from_user(&request, argp, sizeof(request))) - return -EFAULT; - - down(&dev->struct_sem); - if ((int)request.ctx_id >= dev->max_context) { - up(&dev->struct_sem); - return -EINVAL; - } - - map = dev->context_sareas[request.ctx_id]; - up(&dev->struct_sem); - - request.handle = map->handle; - if (copy_to_user(argp, &request, sizeof(request))) - return -EFAULT; - return 0; -} - -int gamma_setsareactx(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_ctx_priv_map_t request; - drm_map_t *map = NULL; - drm_map_list_t *r_list; - struct list_head *list; - - if (copy_from_user(&request, - (drm_ctx_priv_map_t __user *)arg, - sizeof(request))) - return -EFAULT; - - down(&dev->struct_sem); - r_list = NULL; - list_for_each(list, &dev->maplist->head) { - r_list = list_entry(list, drm_map_list_t, head); - if(r_list->map && - r_list->map->handle == request.handle) break; - } - if (list == &(dev->maplist->head)) { - up(&dev->struct_sem); - return -EINVAL; - } - map = r_list->map; - up(&dev->struct_sem); - - if (!map) return -EINVAL; - - down(&dev->struct_sem); - if ((int)request.ctx_id >= dev->max_context) { - up(&dev->struct_sem); - return -EINVAL; - } - dev->context_sareas[request.ctx_id] = map; - up(&dev->struct_sem); - return 0; -} - -void gamma_driver_irq_preinstall( drm_device_t *dev ) { - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - - while(GAMMA_READ(GAMMA_INFIFOSPACE) < 2) - cpu_relax(); - - GAMMA_WRITE( GAMMA_GCOMMANDMODE, 0x00000004 ); - GAMMA_WRITE( GAMMA_GDMACONTROL, 0x00000000 ); -} - -void gamma_driver_irq_postinstall( drm_device_t *dev ) { - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - - while(GAMMA_READ(GAMMA_INFIFOSPACE) < 3) - cpu_relax(); - - GAMMA_WRITE( GAMMA_GINTENABLE, 0x00002001 ); - GAMMA_WRITE( GAMMA_COMMANDINTENABLE, 0x00000008 ); - GAMMA_WRITE( GAMMA_GDELAYTIMER, 0x00039090 ); -} - -void gamma_driver_irq_uninstall( drm_device_t *dev ) { - drm_gamma_private_t *dev_priv = - (drm_gamma_private_t *)dev->dev_private; - if (!dev_priv) - return; - - while(GAMMA_READ(GAMMA_INFIFOSPACE) < 3) - cpu_relax(); - - GAMMA_WRITE( GAMMA_GDELAYTIMER, 0x00000000 ); - GAMMA_WRITE( GAMMA_COMMANDINTENABLE, 0x00000000 ); - GAMMA_WRITE( GAMMA_GINTENABLE, 0x00000000 ); -} - -extern drm_ioctl_desc_t DRM(ioctls)[]; - -static int gamma_driver_preinit(drm_device_t *dev) -{ - /* reset the finish ioctl */ - DRM(ioctls)[DRM_IOCTL_NR(DRM_IOCTL_FINISH)].func = DRM(finish); - return 0; -} - -static void gamma_driver_pretakedown(drm_device_t *dev) -{ - gamma_do_cleanup_dma(dev); -} - -static void gamma_driver_dma_ready(drm_device_t *dev) -{ - gamma_dma_ready(dev); -} - -static int gamma_driver_dma_quiescent(drm_device_t *dev) -{ - drm_gamma_private_t *dev_priv = ( - drm_gamma_private_t *)dev->dev_private; - if (dev_priv->num_rast == 2) - gamma_dma_quiescent_dual(dev); - else gamma_dma_quiescent_single(dev); - return 0; -} - -void gamma_driver_register_fns(drm_device_t *dev) -{ - dev->driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ; - DRM(fops).read = gamma_fops_read; - DRM(fops).poll = gamma_fops_poll; - dev->driver.preinit = gamma_driver_preinit; - dev->driver.pretakedown = gamma_driver_pretakedown; - dev->driver.dma_ready = gamma_driver_dma_ready; - dev->driver.dma_quiescent = gamma_driver_dma_quiescent; - dev->driver.dma_flush_block_and_flush = gamma_flush_block_and_flush; - dev->driver.dma_flush_unblock = gamma_flush_unblock; -} diff --git a/drivers/char/drm/gamma_drm.h b/drivers/char/drm/gamma_drm.h deleted file mode 100644 index 20819ded0e15..000000000000 --- a/drivers/char/drm/gamma_drm.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _GAMMA_DRM_H_ -#define _GAMMA_DRM_H_ - -typedef struct _drm_gamma_tex_region { - unsigned char next, prev; /* indices to form a circular LRU */ - unsigned char in_use; /* owned by a client, or free? */ - int age; /* tracked by clients to update local LRU's */ -} drm_gamma_tex_region_t; - -typedef struct { - unsigned int GDeltaMode; - unsigned int GDepthMode; - unsigned int GGeometryMode; - unsigned int GTransformMode; -} drm_gamma_context_regs_t; - -typedef struct _drm_gamma_sarea { - drm_gamma_context_regs_t context_state; - - unsigned int dirty; - - - /* Maintain an LRU of contiguous regions of texture space. If - * you think you own a region of texture memory, and it has an - * age different to the one you set, then you are mistaken and - * it has been stolen by another client. If global texAge - * hasn't changed, there is no need to walk the list. - * - * These regions can be used as a proxy for the fine-grained - * texture information of other clients - by maintaining them - * in the same lru which is used to age their own textures, - * clients have an approximate lru for the whole of global - * texture space, and can make informed decisions as to which - * areas to kick out. There is no need to choose whether to - * kick out your own texture or someone else's - simply eject - * them all in LRU order. - */ - -#define GAMMA_NR_TEX_REGIONS 64 - drm_gamma_tex_region_t texList[GAMMA_NR_TEX_REGIONS+1]; - /* Last elt is sentinal */ - int texAge; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - int last_dispatch; /* age of the most recently dispatched buffer */ - int last_quiescent; /* */ - int ctxOwner; /* last context to upload state */ - - int vertex_prim; -} drm_gamma_sarea_t; - -/* WARNING: If you change any of these defines, make sure to change the - * defines in the Xserver file (xf86drmGamma.h) - */ - -/* Gamma specific ioctls - * The device specific ioctl range is 0x40 to 0x79. - */ -#define DRM_IOCTL_GAMMA_INIT DRM_IOW( 0x40, drm_gamma_init_t) -#define DRM_IOCTL_GAMMA_COPY DRM_IOW( 0x41, drm_gamma_copy_t) - -typedef struct drm_gamma_copy { - unsigned int DMAOutputAddress; - unsigned int DMAOutputCount; - unsigned int DMAReadGLINTSource; - unsigned int DMARectangleWriteAddress; - unsigned int DMARectangleWriteLinePitch; - unsigned int DMARectangleWrite; - unsigned int DMARectangleReadAddress; - unsigned int DMARectangleReadLinePitch; - unsigned int DMARectangleRead; - unsigned int DMARectangleReadTarget; -} drm_gamma_copy_t; - -typedef struct drm_gamma_init { - enum { - GAMMA_INIT_DMA = 0x01, - GAMMA_CLEANUP_DMA = 0x02 - } func; - - int sarea_priv_offset; - int pcimode; - unsigned int mmio0; - unsigned int mmio1; - unsigned int mmio2; - unsigned int mmio3; - unsigned int buffers_offset; - int num_rast; -} drm_gamma_init_t; - -#endif /* _GAMMA_DRM_H_ */ diff --git a/drivers/char/drm/gamma_drv.c b/drivers/char/drm/gamma_drv.c deleted file mode 100644 index e7e64b62792a..000000000000 --- a/drivers/char/drm/gamma_drv.c +++ /dev/null @@ -1,59 +0,0 @@ -/* gamma.c -- 3dlabs GMX 2000 driver -*- linux-c -*- - * Created: Mon Jan 4 08:58:31 1999 by faith@precisioninsight.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Gareth Hughes - */ - -#include -#include "gamma.h" -#include "drmP.h" -#include "drm.h" -#include "gamma_drm.h" -#include "gamma_drv.h" - -#include "drm_auth.h" -#include "drm_agpsupport.h" -#include "drm_bufs.h" -#include "gamma_context.h" /* NOTE! */ -#include "drm_dma.h" -#include "gamma_old_dma.h" /* NOTE */ -#include "drm_drawable.h" -#include "drm_drv.h" - -#include "drm_fops.h" -#include "drm_init.h" -#include "drm_ioctl.h" -#include "drm_irq.h" -#include "gamma_lists.h" /* NOTE */ -#include "drm_lock.h" -#include "gamma_lock.h" /* NOTE */ -#include "drm_memory.h" -#include "drm_proc.h" -#include "drm_vm.h" -#include "drm_stub.h" -#include "drm_scatter.h" diff --git a/drivers/char/drm/gamma_drv.h b/drivers/char/drm/gamma_drv.h deleted file mode 100644 index 146fcc6253cd..000000000000 --- a/drivers/char/drm/gamma_drv.h +++ /dev/null @@ -1,147 +0,0 @@ -/* gamma_drv.h -- Private header for 3dlabs GMX 2000 driver -*- linux-c -*- - * Created: Mon Jan 4 10:05:05 1999 by faith@precisioninsight.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * - */ - -#ifndef _GAMMA_DRV_H_ -#define _GAMMA_DRV_H_ - -typedef struct drm_gamma_private { - drm_gamma_sarea_t *sarea_priv; - drm_map_t *sarea; - drm_map_t *mmio0; - drm_map_t *mmio1; - drm_map_t *mmio2; - drm_map_t *mmio3; - int num_rast; -} drm_gamma_private_t; - - /* gamma_dma.c */ -extern int gamma_dma_init( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ); -extern int gamma_dma_copy( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ); - -extern int gamma_do_cleanup_dma( drm_device_t *dev ); -extern void gamma_dma_ready(drm_device_t *dev); -extern void gamma_dma_quiescent_single(drm_device_t *dev); -extern void gamma_dma_quiescent_dual(drm_device_t *dev); - - /* gamma_dma.c */ -extern int gamma_dma_schedule(drm_device_t *dev, int locked); -extern int gamma_dma(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern int gamma_find_devices(void); -extern int gamma_found(void); - -/* Gamma-specific code pulled from drm_fops.h: - */ -extern int DRM(finish)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern int DRM(flush_unblock)(drm_device_t *dev, int context, - drm_lock_flags_t flags); -extern int DRM(flush_block_and_flush)(drm_device_t *dev, int context, - drm_lock_flags_t flags); - -/* Gamma-specific code pulled from drm_dma.h: - */ -extern void DRM(clear_next_buffer)(drm_device_t *dev); -extern int DRM(select_queue)(drm_device_t *dev, - void (*wrapper)(unsigned long)); -extern int DRM(dma_enqueue)(struct file *filp, drm_dma_t *dma); -extern int DRM(dma_get_buffers)(struct file *filp, drm_dma_t *dma); - - -/* Gamma-specific code pulled from drm_lists.h (now renamed gamma_lists.h): - */ -extern int DRM(waitlist_create)(drm_waitlist_t *bl, int count); -extern int DRM(waitlist_destroy)(drm_waitlist_t *bl); -extern int DRM(waitlist_put)(drm_waitlist_t *bl, drm_buf_t *buf); -extern drm_buf_t *DRM(waitlist_get)(drm_waitlist_t *bl); -extern int DRM(freelist_create)(drm_freelist_t *bl, int count); -extern int DRM(freelist_destroy)(drm_freelist_t *bl); -extern int DRM(freelist_put)(drm_device_t *dev, drm_freelist_t *bl, - drm_buf_t *buf); -extern drm_buf_t *DRM(freelist_get)(drm_freelist_t *bl, int block); - -/* externs for gamma changes to the ops */ -extern struct file_operations DRM(fops); -extern unsigned int gamma_fops_poll(struct file *filp, struct poll_table_struct *wait); -extern ssize_t gamma_fops_read(struct file *filp, char __user *buf, size_t count, loff_t *off); - - -#define GLINT_DRI_BUF_COUNT 256 - -#define GAMMA_OFF(reg) \ - ((reg < 0x1000) \ - ? reg \ - : ((reg < 0x10000) \ - ? (reg - 0x1000) \ - : ((reg < 0x11000) \ - ? (reg - 0x10000) \ - : (reg - 0x11000)))) - -#define GAMMA_BASE(reg) ((unsigned long) \ - ((reg < 0x1000) ? dev_priv->mmio0->handle : \ - ((reg < 0x10000) ? dev_priv->mmio1->handle : \ - ((reg < 0x11000) ? dev_priv->mmio2->handle : \ - dev_priv->mmio3->handle)))) -#define GAMMA_ADDR(reg) (GAMMA_BASE(reg) + GAMMA_OFF(reg)) -#define GAMMA_DEREF(reg) *(__volatile__ int *)GAMMA_ADDR(reg) -#define GAMMA_READ(reg) GAMMA_DEREF(reg) -#define GAMMA_WRITE(reg,val) do { GAMMA_DEREF(reg) = val; } while (0) - -#define GAMMA_BROADCASTMASK 0x9378 -#define GAMMA_COMMANDINTENABLE 0x0c48 -#define GAMMA_DMAADDRESS 0x0028 -#define GAMMA_DMACOUNT 0x0030 -#define GAMMA_FILTERMODE 0x8c00 -#define GAMMA_GCOMMANDINTFLAGS 0x0c50 -#define GAMMA_GCOMMANDMODE 0x0c40 -#define GAMMA_QUEUED_DMA_MODE 1<<1 -#define GAMMA_GCOMMANDSTATUS 0x0c60 -#define GAMMA_GDELAYTIMER 0x0c38 -#define GAMMA_GDMACONTROL 0x0060 -#define GAMMA_USE_AGP 1<<1 -#define GAMMA_GINTENABLE 0x0808 -#define GAMMA_GINTFLAGS 0x0810 -#define GAMMA_INFIFOSPACE 0x0018 -#define GAMMA_OUTFIFOWORDS 0x0020 -#define GAMMA_OUTPUTFIFO 0x2000 -#define GAMMA_SYNC 0x8c40 -#define GAMMA_SYNC_TAG 0x0188 -#define GAMMA_PAGETABLEADDR 0x0C00 -#define GAMMA_PAGETABLELENGTH 0x0C08 - -#define GAMMA_PASSTHROUGH 0x1FE -#define GAMMA_DMAADDRTAG 0x530 -#define GAMMA_DMACOUNTTAG 0x531 -#define GAMMA_COMMANDINTTAG 0x532 - -#endif diff --git a/drivers/char/drm/gamma_lists.h b/drivers/char/drm/gamma_lists.h deleted file mode 100644 index 2d93f412b96b..000000000000 --- a/drivers/char/drm/gamma_lists.h +++ /dev/null @@ -1,215 +0,0 @@ -/* drm_lists.h -- Buffer list handling routines -*- linux-c -*- - * Created: Mon Apr 19 20:54:22 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Gareth Hughes - */ - -#include "drmP.h" - - -int DRM(waitlist_create)(drm_waitlist_t *bl, int count) -{ - if (bl->count) return -EINVAL; - - bl->bufs = DRM(alloc)((bl->count + 2) * sizeof(*bl->bufs), - DRM_MEM_BUFLISTS); - - if(!bl->bufs) return -ENOMEM; - memset(bl->bufs, 0, sizeof(*bl->bufs)); - bl->count = count; - bl->rp = bl->bufs; - bl->wp = bl->bufs; - bl->end = &bl->bufs[bl->count+1]; - spin_lock_init(&bl->write_lock); - spin_lock_init(&bl->read_lock); - return 0; -} - -int DRM(waitlist_destroy)(drm_waitlist_t *bl) -{ - if (bl->rp != bl->wp) return -EINVAL; - if (bl->bufs) DRM(free)(bl->bufs, - (bl->count + 2) * sizeof(*bl->bufs), - DRM_MEM_BUFLISTS); - bl->count = 0; - bl->bufs = NULL; - bl->rp = NULL; - bl->wp = NULL; - bl->end = NULL; - return 0; -} - -int DRM(waitlist_put)(drm_waitlist_t *bl, drm_buf_t *buf) -{ - int left; - unsigned long flags; - - left = DRM_LEFTCOUNT(bl); - if (!left) { - DRM_ERROR("Overflow while adding buffer %d from filp %p\n", - buf->idx, buf->filp); - return -EINVAL; - } - buf->list = DRM_LIST_WAIT; - - spin_lock_irqsave(&bl->write_lock, flags); - *bl->wp = buf; - if (++bl->wp >= bl->end) bl->wp = bl->bufs; - spin_unlock_irqrestore(&bl->write_lock, flags); - - return 0; -} - -drm_buf_t *DRM(waitlist_get)(drm_waitlist_t *bl) -{ - drm_buf_t *buf; - unsigned long flags; - - spin_lock_irqsave(&bl->read_lock, flags); - buf = *bl->rp; - if (bl->rp == bl->wp) { - spin_unlock_irqrestore(&bl->read_lock, flags); - return NULL; - } - if (++bl->rp >= bl->end) bl->rp = bl->bufs; - spin_unlock_irqrestore(&bl->read_lock, flags); - - return buf; -} - -int DRM(freelist_create)(drm_freelist_t *bl, int count) -{ - atomic_set(&bl->count, 0); - bl->next = NULL; - init_waitqueue_head(&bl->waiting); - bl->low_mark = 0; - bl->high_mark = 0; - atomic_set(&bl->wfh, 0); - spin_lock_init(&bl->lock); - ++bl->initialized; - return 0; -} - -int DRM(freelist_destroy)(drm_freelist_t *bl) -{ - atomic_set(&bl->count, 0); - bl->next = NULL; - return 0; -} - -int DRM(freelist_put)(drm_device_t *dev, drm_freelist_t *bl, drm_buf_t *buf) -{ - drm_device_dma_t *dma = dev->dma; - - if (!dma) { - DRM_ERROR("No DMA support\n"); - return 1; - } - - if (buf->waiting || buf->pending || buf->list == DRM_LIST_FREE) { - DRM_ERROR("Freed buffer %d: w%d, p%d, l%d\n", - buf->idx, buf->waiting, buf->pending, buf->list); - } - if (!bl) return 1; - buf->list = DRM_LIST_FREE; - - spin_lock(&bl->lock); - buf->next = bl->next; - bl->next = buf; - spin_unlock(&bl->lock); - - atomic_inc(&bl->count); - if (atomic_read(&bl->count) > dma->buf_count) { - DRM_ERROR("%d of %d buffers free after addition of %d\n", - atomic_read(&bl->count), dma->buf_count, buf->idx); - return 1; - } - /* Check for high water mark */ - if (atomic_read(&bl->wfh) && atomic_read(&bl->count)>=bl->high_mark) { - atomic_set(&bl->wfh, 0); - wake_up_interruptible(&bl->waiting); - } - return 0; -} - -static drm_buf_t *DRM(freelist_try)(drm_freelist_t *bl) -{ - drm_buf_t *buf; - - if (!bl) return NULL; - - /* Get buffer */ - spin_lock(&bl->lock); - if (!bl->next) { - spin_unlock(&bl->lock); - return NULL; - } - buf = bl->next; - bl->next = bl->next->next; - spin_unlock(&bl->lock); - - atomic_dec(&bl->count); - buf->next = NULL; - buf->list = DRM_LIST_NONE; - if (buf->waiting || buf->pending) { - DRM_ERROR("Free buffer %d: w%d, p%d, l%d\n", - buf->idx, buf->waiting, buf->pending, buf->list); - } - - return buf; -} - -drm_buf_t *DRM(freelist_get)(drm_freelist_t *bl, int block) -{ - drm_buf_t *buf = NULL; - DECLARE_WAITQUEUE(entry, current); - - if (!bl || !bl->initialized) return NULL; - - /* Check for low water mark */ - if (atomic_read(&bl->count) <= bl->low_mark) /* Became low */ - atomic_set(&bl->wfh, 1); - if (atomic_read(&bl->wfh)) { - if (block) { - add_wait_queue(&bl->waiting, &entry); - for (;;) { - current->state = TASK_INTERRUPTIBLE; - if (!atomic_read(&bl->wfh) - && (buf = DRM(freelist_try)(bl))) break; - schedule(); - if (signal_pending(current)) break; - } - current->state = TASK_RUNNING; - remove_wait_queue(&bl->waiting, &entry); - } - return buf; - } - - return DRM(freelist_try)(bl); -} - diff --git a/drivers/char/drm/gamma_lock.h b/drivers/char/drm/gamma_lock.h deleted file mode 100644 index ddec67e4ed16..000000000000 --- a/drivers/char/drm/gamma_lock.h +++ /dev/null @@ -1,140 +0,0 @@ -/* lock.c -- IOCTLs for locking -*- linux-c -*- - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Gareth Hughes - */ - - -/* Gamma-specific code extracted from drm_lock.h: - */ -static int DRM(flush_queue)(drm_device_t *dev, int context) -{ - DECLARE_WAITQUEUE(entry, current); - int ret = 0; - drm_queue_t *q = dev->queuelist[context]; - - DRM_DEBUG("\n"); - - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) > 1) { - atomic_inc(&q->block_write); - add_wait_queue(&q->flush_queue, &entry); - atomic_inc(&q->block_count); - for (;;) { - current->state = TASK_INTERRUPTIBLE; - if (!DRM_BUFCOUNT(&q->waitlist)) break; - schedule(); - if (signal_pending(current)) { - ret = -EINTR; /* Can't restart */ - break; - } - } - atomic_dec(&q->block_count); - current->state = TASK_RUNNING; - remove_wait_queue(&q->flush_queue, &entry); - } - atomic_dec(&q->use_count); - - /* NOTE: block_write is still incremented! - Use drm_flush_unlock_queue to decrement. */ - return ret; -} - -static int DRM(flush_unblock_queue)(drm_device_t *dev, int context) -{ - drm_queue_t *q = dev->queuelist[context]; - - DRM_DEBUG("\n"); - - atomic_inc(&q->use_count); - if (atomic_read(&q->use_count) > 1) { - if (atomic_read(&q->block_write)) { - atomic_dec(&q->block_write); - wake_up_interruptible(&q->write_queue); - } - } - atomic_dec(&q->use_count); - return 0; -} - -int DRM(flush_block_and_flush)(drm_device_t *dev, int context, - drm_lock_flags_t flags) -{ - int ret = 0; - int i; - - DRM_DEBUG("\n"); - - if (flags & _DRM_LOCK_FLUSH) { - ret = DRM(flush_queue)(dev, DRM_KERNEL_CONTEXT); - if (!ret) ret = DRM(flush_queue)(dev, context); - } - if (flags & _DRM_LOCK_FLUSH_ALL) { - for (i = 0; !ret && i < dev->queue_count; i++) { - ret = DRM(flush_queue)(dev, i); - } - } - return ret; -} - -int DRM(flush_unblock)(drm_device_t *dev, int context, drm_lock_flags_t flags) -{ - int ret = 0; - int i; - - DRM_DEBUG("\n"); - - if (flags & _DRM_LOCK_FLUSH) { - ret = DRM(flush_unblock_queue)(dev, DRM_KERNEL_CONTEXT); - if (!ret) ret = DRM(flush_unblock_queue)(dev, context); - } - if (flags & _DRM_LOCK_FLUSH_ALL) { - for (i = 0; !ret && i < dev->queue_count; i++) { - ret = DRM(flush_unblock_queue)(dev, i); - } - } - - return ret; -} - -int DRM(finish)(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - int ret = 0; - drm_lock_t lock; - - DRM_DEBUG("\n"); - - if (copy_from_user(&lock, (drm_lock_t __user *)arg, sizeof(lock))) - return -EFAULT; - ret = DRM(flush_block_and_flush)(dev, lock.context, lock.flags); - DRM(flush_unblock)(dev, lock.context, lock.flags); - return ret; -} diff --git a/drivers/char/drm/gamma_old_dma.h b/drivers/char/drm/gamma_old_dma.h deleted file mode 100644 index abdd454aab9f..000000000000 --- a/drivers/char/drm/gamma_old_dma.h +++ /dev/null @@ -1,313 +0,0 @@ -/* drm_dma.c -- DMA IOCTL and function support -*- linux-c -*- - * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rickard E. (Rik) Faith - * Gareth Hughes - */ - - -/* Gamma-specific code pulled from drm_dma.h: - */ - -void DRM(clear_next_buffer)(drm_device_t *dev) -{ - drm_device_dma_t *dma = dev->dma; - - dma->next_buffer = NULL; - if (dma->next_queue && !DRM_BUFCOUNT(&dma->next_queue->waitlist)) { - wake_up_interruptible(&dma->next_queue->flush_queue); - } - dma->next_queue = NULL; -} - -int DRM(select_queue)(drm_device_t *dev, void (*wrapper)(unsigned long)) -{ - int i; - int candidate = -1; - int j = jiffies; - - if (!dev) { - DRM_ERROR("No device\n"); - return -1; - } - if (!dev->queuelist || !dev->queuelist[DRM_KERNEL_CONTEXT]) { - /* This only happens between the time the - interrupt is initialized and the time - the queues are initialized. */ - return -1; - } - - /* Doing "while locked" DMA? */ - if (DRM_WAITCOUNT(dev, DRM_KERNEL_CONTEXT)) { - return DRM_KERNEL_CONTEXT; - } - - /* If there are buffers on the last_context - queue, and we have not been executing - this context very long, continue to - execute this context. */ - if (dev->last_switch <= j - && dev->last_switch + DRM_TIME_SLICE > j - && DRM_WAITCOUNT(dev, dev->last_context)) { - return dev->last_context; - } - - /* Otherwise, find a candidate */ - for (i = dev->last_checked + 1; i < dev->queue_count; i++) { - if (DRM_WAITCOUNT(dev, i)) { - candidate = dev->last_checked = i; - break; - } - } - - if (candidate < 0) { - for (i = 0; i < dev->queue_count; i++) { - if (DRM_WAITCOUNT(dev, i)) { - candidate = dev->last_checked = i; - break; - } - } - } - - if (wrapper - && candidate >= 0 - && candidate != dev->last_context - && dev->last_switch <= j - && dev->last_switch + DRM_TIME_SLICE > j) { - if (dev->timer.expires != dev->last_switch + DRM_TIME_SLICE) { - del_timer(&dev->timer); - dev->timer.function = wrapper; - dev->timer.data = (unsigned long)dev; - dev->timer.expires = dev->last_switch+DRM_TIME_SLICE; - add_timer(&dev->timer); - } - return -1; - } - - return candidate; -} - - -int DRM(dma_enqueue)(struct file *filp, drm_dma_t *d) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - int i; - drm_queue_t *q; - drm_buf_t *buf; - int idx; - int while_locked = 0; - drm_device_dma_t *dma = dev->dma; - int *ind; - int err; - DECLARE_WAITQUEUE(entry, current); - - DRM_DEBUG("%d\n", d->send_count); - - if (d->flags & _DRM_DMA_WHILE_LOCKED) { - int context = dev->lock.hw_lock->lock; - - if (!_DRM_LOCK_IS_HELD(context)) { - DRM_ERROR("No lock held during \"while locked\"" - " request\n"); - return -EINVAL; - } - if (d->context != _DRM_LOCKING_CONTEXT(context) - && _DRM_LOCKING_CONTEXT(context) != DRM_KERNEL_CONTEXT) { - DRM_ERROR("Lock held by %d while %d makes" - " \"while locked\" request\n", - _DRM_LOCKING_CONTEXT(context), - d->context); - return -EINVAL; - } - q = dev->queuelist[DRM_KERNEL_CONTEXT]; - while_locked = 1; - } else { - q = dev->queuelist[d->context]; - } - - - atomic_inc(&q->use_count); - if (atomic_read(&q->block_write)) { - add_wait_queue(&q->write_queue, &entry); - atomic_inc(&q->block_count); - for (;;) { - current->state = TASK_INTERRUPTIBLE; - if (!atomic_read(&q->block_write)) break; - schedule(); - if (signal_pending(current)) { - atomic_dec(&q->use_count); - remove_wait_queue(&q->write_queue, &entry); - return -EINTR; - } - } - atomic_dec(&q->block_count); - current->state = TASK_RUNNING; - remove_wait_queue(&q->write_queue, &entry); - } - - ind = DRM(alloc)(d->send_count * sizeof(int), DRM_MEM_DRIVER); - if (!ind) - return -ENOMEM; - - if (copy_from_user(ind, d->send_indices, d->send_count * sizeof(int))) { - err = -EFAULT; - goto out; - } - - err = -EINVAL; - for (i = 0; i < d->send_count; i++) { - idx = ind[i]; - if (idx < 0 || idx >= dma->buf_count) { - DRM_ERROR("Index %d (of %d max)\n", - ind[i], dma->buf_count - 1); - goto out; - } - buf = dma->buflist[ idx ]; - if (buf->filp != filp) { - DRM_ERROR("Process %d using buffer not owned\n", - current->pid); - goto out; - } - if (buf->list != DRM_LIST_NONE) { - DRM_ERROR("Process %d using buffer %d on list %d\n", - current->pid, buf->idx, buf->list); - goto out; - } - buf->used = ind[i]; - buf->while_locked = while_locked; - buf->context = d->context; - if (!buf->used) { - DRM_ERROR("Queueing 0 length buffer\n"); - } - if (buf->pending) { - DRM_ERROR("Queueing pending buffer:" - " buffer %d, offset %d\n", - ind[i], i); - goto out; - } - if (buf->waiting) { - DRM_ERROR("Queueing waiting buffer:" - " buffer %d, offset %d\n", - ind[i], i); - goto out; - } - buf->waiting = 1; - if (atomic_read(&q->use_count) == 1 - || atomic_read(&q->finalization)) { - DRM(free_buffer)(dev, buf); - } else { - DRM(waitlist_put)(&q->waitlist, buf); - atomic_inc(&q->total_queued); - } - } - atomic_dec(&q->use_count); - - return 0; - -out: - DRM(free)(ind, d->send_count * sizeof(int), DRM_MEM_DRIVER); - atomic_dec(&q->use_count); - return err; -} - -static int DRM(dma_get_buffers_of_order)(struct file *filp, drm_dma_t *d, - int order) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - int i; - drm_buf_t *buf; - drm_device_dma_t *dma = dev->dma; - - for (i = d->granted_count; i < d->request_count; i++) { - buf = DRM(freelist_get)(&dma->bufs[order].freelist, - d->flags & _DRM_DMA_WAIT); - if (!buf) break; - if (buf->pending || buf->waiting) { - DRM_ERROR("Free buffer %d in use: filp %p (w%d, p%d)\n", - buf->idx, - buf->filp, - buf->waiting, - buf->pending); - } - buf->filp = filp; - if (copy_to_user(&d->request_indices[i], - &buf->idx, - sizeof(buf->idx))) - return -EFAULT; - - if (copy_to_user(&d->request_sizes[i], - &buf->total, - sizeof(buf->total))) - return -EFAULT; - - ++d->granted_count; - } - return 0; -} - - -int DRM(dma_get_buffers)(struct file *filp, drm_dma_t *dma) -{ - int order; - int retcode = 0; - int tmp_order; - - order = DRM(order)(dma->request_size); - - dma->granted_count = 0; - retcode = DRM(dma_get_buffers_of_order)(filp, dma, order); - - if (dma->granted_count < dma->request_count - && (dma->flags & _DRM_DMA_SMALLER_OK)) { - for (tmp_order = order - 1; - !retcode - && dma->granted_count < dma->request_count - && tmp_order >= DRM_MIN_ORDER; - --tmp_order) { - - retcode = DRM(dma_get_buffers_of_order)(filp, dma, - tmp_order); - } - } - - if (dma->granted_count < dma->request_count - && (dma->flags & _DRM_DMA_LARGER_OK)) { - for (tmp_order = order + 1; - !retcode - && dma->granted_count < dma->request_count - && tmp_order <= DRM_MAX_ORDER; - ++tmp_order) { - - retcode = DRM(dma_get_buffers_of_order)(filp, dma, - tmp_order); - } - } - return 0; -} - From aa0ca6b4bb818406d4769edb9ff115500c8e4090 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2005 23:09:14 +1000 Subject: [PATCH 061/584] drm: fix warning in drm_pci.c Signed-off-by: Adrian Bunk Signed-off-by: Dave Airlie --- drivers/char/drm/drm_pci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/drm/drm_pci.c b/drivers/char/drm/drm_pci.c index 3e452e8967fa..09ed712c1a7f 100644 --- a/drivers/char/drm/drm_pci.c +++ b/drivers/char/drm/drm_pci.c @@ -50,7 +50,7 @@ drm_dma_handle_t *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, dma_addr_t maxaddr) { drm_dma_handle_t *dmah; -#if DRM_DEBUG_MEMORY +#ifdef DRM_DEBUG_MEMORY int area = DRM_MEM_DMA; spin_lock(&drm_mem_lock); @@ -81,7 +81,7 @@ drm_dma_handle_t *drm_pci_alloc(drm_device_t * dev, size_t size, size_t align, dmah->size = size; dmah->vaddr = pci_alloc_consistent(dev->pdev, size, &dmah->busaddr); -#if DRM_DEBUG_MEMORY +#ifdef DRM_DEBUG_MEMORY if (dmah->vaddr == NULL) { spin_lock(&drm_mem_lock); ++drm_mem_stats[area].fail_count; @@ -116,14 +116,14 @@ EXPORT_SYMBOL(drm_pci_alloc); void __drm_pci_free(drm_device_t * dev, drm_dma_handle_t *dmah) { -#if DRM_DEBUG_MEMORY +#ifdef DRM_DEBUG_MEMORY int area = DRM_MEM_DMA; int alloc_count; int free_count; #endif if (!dmah->vaddr) { -#if DRM_DEBUG_MEMORY +#ifdef DRM_DEBUG_MEMORY DRM_MEM_ERROR(area, "Attempt to free address 0\n"); #endif } else { @@ -131,7 +131,7 @@ __drm_pci_free(drm_device_t * dev, drm_dma_handle_t *dmah) dmah->busaddr); } -#if DRM_DEBUG_MEMORY +#ifdef DRM_DEBUG_MEMORY spin_lock(&drm_mem_lock); free_count = ++drm_mem_stats[area].free_count; alloc_count = drm_mem_stats[area].succeed_count; From d27c9b548ad79c14830c57355dbe3a35f970532a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 7 Aug 2005 15:19:58 +1000 Subject: [PATCH 062/584] drm: remove version.h and any version checks.. This patch removes all the drm kernel conditionals from the kernel DRM tree. Signed-off-by: Dave Airlie --- drivers/char/drm/drmP.h | 34 --------------------------- drivers/char/drm/drm_bufs.c | 16 ------------- drivers/char/drm/drm_vm.c | 47 ------------------------------------- drivers/char/drm/i810_dma.c | 5 ---- drivers/char/drm/i830_dma.c | 5 ---- 5 files changed, 107 deletions(-) diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 0a4358996970..6f98701dfe15 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -53,7 +53,6 @@ #include #include #include -#include #include #include /* For (un)lock_kernel */ #include @@ -161,36 +160,7 @@ #define pte_unmap(pte) #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19) -static inline struct page * vmalloc_to_page(void * vmalloc_addr) -{ - unsigned long addr = (unsigned long) vmalloc_addr; - struct page *page = NULL; - pgd_t *pgd = pgd_offset_k(addr); - pmd_t *pmd; - pte_t *ptep, pte; - - if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); - if (!pmd_none(*pmd)) { - preempt_disable(); - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); - preempt_enable(); - } - } - return page; -} -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -#define DRM_RPR_ARG(vma) -#else #define DRM_RPR_ARG(vma) vma, -#endif #define VM_OFFSET(vma) ((vma)->vm_pgoff << PAGE_SHIFT) @@ -746,11 +716,7 @@ typedef struct drm_device { int pci_slot; /**< PCI slot number */ int pci_func; /**< PCI function number */ #ifdef __alpha__ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) - struct pci_controler *hose; -#else struct pci_controller *hose; -#endif #endif drm_sg_mem_t *sg; /**< Scatter gather memory */ unsigned long *ctx_bitmap; /**< context bitmap */ diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index d1e0b106c261..b70802035d9e 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -1499,34 +1499,18 @@ int drm_mapbufs( struct inode *inode, struct file *filp, goto done; } -#if LINUX_VERSION_CODE <= 0x020402 - down( ¤t->mm->mmap_sem ); -#else down_write( ¤t->mm->mmap_sem ); -#endif virtual = do_mmap( filp, 0, map->size, PROT_READ | PROT_WRITE, MAP_SHARED, token ); -#if LINUX_VERSION_CODE <= 0x020402 - up( ¤t->mm->mmap_sem ); -#else up_write( ¤t->mm->mmap_sem ); -#endif } else { -#if LINUX_VERSION_CODE <= 0x020402 - down( ¤t->mm->mmap_sem ); -#else down_write( ¤t->mm->mmap_sem ); -#endif virtual = do_mmap( filp, 0, dma->byte_count, PROT_READ | PROT_WRITE, MAP_SHARED, 0 ); -#if LINUX_VERSION_CODE <= 0x020402 - up( ¤t->mm->mmap_sem ); -#else up_write( ¤t->mm->mmap_sem ); -#endif } if ( virtual > -1024UL ) { /* Real error */ diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index 99b5c86f7513..292fa7167b26 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -314,8 +314,6 @@ static __inline__ struct page *drm_do_vm_sg_nopage(struct vm_area_struct *vma, } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) - static struct page *drm_vm_nopage(struct vm_area_struct *vma, unsigned long address, int *type) { @@ -344,35 +342,6 @@ static struct page *drm_vm_sg_nopage(struct vm_area_struct *vma, return drm_do_vm_sg_nopage(vma, address); } -#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) */ - -static struct page *drm_vm_nopage(struct vm_area_struct *vma, - unsigned long address, - int unused) { - return drm_do_vm_nopage(vma, address); -} - -static struct page *drm_vm_shm_nopage(struct vm_area_struct *vma, - unsigned long address, - int unused) { - return drm_do_vm_shm_nopage(vma, address); -} - -static struct page *drm_vm_dma_nopage(struct vm_area_struct *vma, - unsigned long address, - int unused) { - return drm_do_vm_dma_nopage(vma, address); -} - -static struct page *drm_vm_sg_nopage(struct vm_area_struct *vma, - unsigned long address, - int unused) { - return drm_do_vm_sg_nopage(vma, address); -} - -#endif - - /** AGP virtual memory operations */ static struct vm_operations_struct drm_vm_ops = { .nopage = drm_vm_nopage, @@ -496,11 +465,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &drm_vm_dma_ops; -#if LINUX_VERSION_CODE <= 0x02040e /* KERNEL_VERSION(2,4,14) */ - vma->vm_flags |= VM_LOCKED | VM_SHM; /* Don't swap */ -#else vma->vm_flags |= VM_RESERVED; /* Don't swap */ -#endif vma->vm_file = filp; /* Needed for drm_vm_open() */ drm_vm_open(vma); @@ -660,29 +625,17 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = (void *)map; /* Don't let this area swap. Change when DRM_KERNEL advisory is supported. */ -#if LINUX_VERSION_CODE <= 0x02040e /* KERNEL_VERSION(2,4,14) */ - vma->vm_flags |= VM_LOCKED; -#else vma->vm_flags |= VM_RESERVED; -#endif break; case _DRM_SCATTER_GATHER: vma->vm_ops = &drm_vm_sg_ops; vma->vm_private_data = (void *)map; -#if LINUX_VERSION_CODE <= 0x02040e /* KERNEL_VERSION(2,4,14) */ - vma->vm_flags |= VM_LOCKED; -#else vma->vm_flags |= VM_RESERVED; -#endif break; default: return -EINVAL; /* This should never happen. */ } -#if LINUX_VERSION_CODE <= 0x02040e /* KERNEL_VERSION(2,4,14) */ - vma->vm_flags |= VM_LOCKED | VM_SHM; /* Don't swap */ -#else vma->vm_flags |= VM_RESERVED; /* Don't swap */ -#endif vma->vm_file = filp; /* Needed for drm_vm_open() */ drm_vm_open(vma); diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c index f9fd5abd774b..2f1659b96fd1 100644 --- a/drivers/char/drm/i810_dma.c +++ b/drivers/char/drm/i810_dma.c @@ -45,11 +45,6 @@ #define I810_BUF_UNMAPPED 0 #define I810_BUF_MAPPED 1 -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,2) -#define down_write down -#define up_write up -#endif - static drm_buf_t *i810_freelist_get(drm_device_t *dev) { drm_device_dma_t *dma = dev->dma; diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c index 697cefccd007..6f89d5796ef3 100644 --- a/drivers/char/drm/i830_dma.c +++ b/drivers/char/drm/i830_dma.c @@ -47,11 +47,6 @@ #define I830_BUF_UNMAPPED 0 #define I830_BUF_MAPPED 1 -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,2) -#define down_write down -#define up_write up -#endif - static drm_buf_t *i830_freelist_get(drm_device_t *dev) { drm_device_dma_t *dma = dev->dma; From 282a16749ba63256bcdce2766817f46aaac4dc20 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 7 Aug 2005 15:43:54 +1000 Subject: [PATCH 063/584] drm: add savage driver Add driver for savage chipsets. From: Felix Kuehling Signed-off-by: Dave Airlie --- drivers/char/drm/Kconfig | 7 + drivers/char/drm/Makefile | 3 + drivers/char/drm/savage_bci.c | 1096 +++++++++++++++++++++++++++++ drivers/char/drm/savage_drm.h | 209 ++++++ drivers/char/drm/savage_drv.c | 112 +++ drivers/char/drm/savage_drv.h | 579 ++++++++++++++++ drivers/char/drm/savage_state.c | 1146 +++++++++++++++++++++++++++++++ 7 files changed, 3152 insertions(+) create mode 100644 drivers/char/drm/savage_bci.c create mode 100644 drivers/char/drm/savage_drm.h create mode 100644 drivers/char/drm/savage_drv.c create mode 100644 drivers/char/drm/savage_drv.h create mode 100644 drivers/char/drm/savage_state.c diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig index f31b9706ef65..56ace9d5e2ae 100644 --- a/drivers/char/drm/Kconfig +++ b/drivers/char/drm/Kconfig @@ -96,3 +96,10 @@ config DRM_VIA Choose this option if you have a Via unichrome or compatible video chipset. If M is selected the module will be called via. +config DRM_SAVAGE + tristate "Savage video cards" + depends on DRM + help + Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister + chipset. If M is selected the module will be called savage. + diff --git a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile index 3f0cf8e9cc50..1945138cb8fb 100644 --- a/drivers/char/drm/Makefile +++ b/drivers/char/drm/Makefile @@ -17,6 +17,7 @@ i915-objs := i915_drv.o i915_dma.o i915_irq.o i915_mem.o radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o ffb-objs := ffb_drv.o ffb_context.o sis-objs := sis_drv.o sis_ds.o sis_mm.o +savage-objs := savage_drv.o savage_bci.o savage_state.o via-objs := via_irq.o via_drv.o via_ds.o via_map.o via_mm.o via_dma.o via_verifier.o via_video.o ifeq ($(CONFIG_COMPAT),y) @@ -37,5 +38,7 @@ obj-$(CONFIG_DRM_I830) += i830.o obj-$(CONFIG_DRM_I915) += i915.o obj-$(CONFIG_DRM_FFB) += ffb.o obj-$(CONFIG_DRM_SIS) += sis.o +obj-$(CONFIG_DRM_SAVAGE)+= savage.o obj-$(CONFIG_DRM_VIA) +=via.o + diff --git a/drivers/char/drm/savage_bci.c b/drivers/char/drm/savage_bci.c new file mode 100644 index 000000000000..2fd40bac7c97 --- /dev/null +++ b/drivers/char/drm/savage_bci.c @@ -0,0 +1,1096 @@ +/* savage_bci.c -- BCI support for Savage + * + * Copyright 2004 Felix Kuehling + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "drmP.h" +#include "savage_drm.h" +#include "savage_drv.h" + +/* Need a long timeout for shadow status updates can take a while + * and so can waiting for events when the queue is full. */ +#define SAVAGE_DEFAULT_USEC_TIMEOUT 1000000 /* 1s */ +#define SAVAGE_EVENT_USEC_TIMEOUT 5000000 /* 5s */ +#define SAVAGE_FREELIST_DEBUG 0 + +static int +savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n) +{ + uint32_t mask = dev_priv->status_used_mask; + uint32_t threshold = dev_priv->bci_threshold_hi; + uint32_t status; + int i; + +#if SAVAGE_BCI_DEBUG + if (n > dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - threshold) + DRM_ERROR("Trying to emit %d words " + "(more than guaranteed space in COB)\n", n); +#endif + + for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); + status = dev_priv->status_ptr[0]; + if ((status & mask) < threshold) + return 0; + DRM_UDELAY(1); + } + +#if SAVAGE_BCI_DEBUG + DRM_ERROR("failed!\n"); + DRM_INFO(" status=0x%08x, threshold=0x%08x\n", status, threshold); +#endif + return DRM_ERR(EBUSY); +} + +static int +savage_bci_wait_fifo_s3d(drm_savage_private_t *dev_priv, unsigned int n) +{ + uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n; + uint32_t status; + int i; + + for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + status = SAVAGE_READ(SAVAGE_STATUS_WORD0); + if ((status & SAVAGE_FIFO_USED_MASK_S3D) <= maxUsed) + return 0; + DRM_UDELAY(1); + } + +#if SAVAGE_BCI_DEBUG + DRM_ERROR("failed!\n"); + DRM_INFO(" status=0x%08x\n", status); +#endif + return DRM_ERR(EBUSY); +} + +static int +savage_bci_wait_fifo_s4(drm_savage_private_t *dev_priv, unsigned int n) +{ + uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n; + uint32_t status; + int i; + + for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + status = SAVAGE_READ(SAVAGE_ALT_STATUS_WORD0); + if ((status & SAVAGE_FIFO_USED_MASK_S4) <= maxUsed) + return 0; + DRM_UDELAY(1); + } + +#if SAVAGE_BCI_DEBUG + DRM_ERROR("failed!\n"); + DRM_INFO(" status=0x%08x\n", status); +#endif + return DRM_ERR(EBUSY); +} + +/* + * Waiting for events. + * + * The BIOSresets the event tag to 0 on mode changes. Therefore we + * never emit 0 to the event tag. If we find a 0 event tag we know the + * BIOS stomped on it and return success assuming that the BIOS waited + * for engine idle. + * + * Note: if the Xserver uses the event tag it has to follow the same + * rule. Otherwise there may be glitches every 2^16 events. + */ +static int +savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e) +{ + uint32_t status; + int i; + + for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); + status = dev_priv->status_ptr[1]; + if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff || + (status & 0xffff) == 0) + return 0; + DRM_UDELAY(1); + } + +#if SAVAGE_BCI_DEBUG + DRM_ERROR("failed!\n"); + DRM_INFO(" status=0x%08x, e=0x%04x\n", status, e); +#endif + + return DRM_ERR(EBUSY); +} + +static int +savage_bci_wait_event_reg(drm_savage_private_t *dev_priv, uint16_t e) +{ + uint32_t status; + int i; + + for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) { + status = SAVAGE_READ(SAVAGE_STATUS_WORD1); + if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff || + (status & 0xffff) == 0) + return 0; + DRM_UDELAY(1); + } + +#if SAVAGE_BCI_DEBUG + DRM_ERROR("failed!\n"); + DRM_INFO(" status=0x%08x, e=0x%04x\n", status, e); +#endif + + return DRM_ERR(EBUSY); +} + +uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, + unsigned int flags) +{ + uint16_t count; + BCI_LOCALS; + + if (dev_priv->status_ptr) { + /* coordinate with Xserver */ + count = dev_priv->status_ptr[1023]; + if (count < dev_priv->event_counter) + dev_priv->event_wrap++; + } else { + count = dev_priv->event_counter; + } + count = (count + 1) & 0xffff; + if (count == 0) { + count++; /* See the comment above savage_wait_event_*. */ + dev_priv->event_wrap++; + } + dev_priv->event_counter = count; + if (dev_priv->status_ptr) + dev_priv->status_ptr[1023] = (uint32_t)count; + + if ((flags & (SAVAGE_WAIT_2D | SAVAGE_WAIT_3D))) { + unsigned int wait_cmd = BCI_CMD_WAIT; + if ((flags & SAVAGE_WAIT_2D)) + wait_cmd |= BCI_CMD_WAIT_2D; + if ((flags & SAVAGE_WAIT_3D)) + wait_cmd |= BCI_CMD_WAIT_3D; + BEGIN_BCI(2); + BCI_WRITE(wait_cmd); + } else { + BEGIN_BCI(1); + } + BCI_WRITE(BCI_CMD_UPDATE_EVENT_TAG | (uint32_t)count); + + return count; +} + +/* + * Freelist management + */ +static int savage_freelist_init(drm_device_t *dev) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_buf_t *buf; + drm_savage_buf_priv_t *entry; + int i; + DRM_DEBUG("count=%d\n", dma->buf_count); + + dev_priv->head.next = &dev_priv->tail; + dev_priv->head.prev = NULL; + dev_priv->head.buf = NULL; + + dev_priv->tail.next = NULL; + dev_priv->tail.prev = &dev_priv->head; + dev_priv->tail.buf = NULL; + + for (i = 0; i < dma->buf_count; i++) { + buf = dma->buflist[i]; + entry = buf->dev_private; + + SET_AGE(&entry->age, 0, 0); + entry->buf = buf; + + entry->next = dev_priv->head.next; + entry->prev = &dev_priv->head; + dev_priv->head.next->prev = entry; + dev_priv->head.next = entry; + } + + return 0; +} + +static drm_buf_t *savage_freelist_get(drm_device_t *dev) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + drm_savage_buf_priv_t *tail = dev_priv->tail.prev; + uint16_t event; + unsigned int wrap; + DRM_DEBUG("\n"); + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + event = dev_priv->status_ptr[1] & 0xffff; + else + event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ + + DRM_DEBUG(" tail=0x%04x %d\n", tail->age.event, tail->age.wrap); + DRM_DEBUG(" head=0x%04x %d\n", event, wrap); + + if (tail->buf && (TEST_AGE(&tail->age, event, wrap) || event == 0)) { + drm_savage_buf_priv_t *next = tail->next; + drm_savage_buf_priv_t *prev = tail->prev; + prev->next = next; + next->prev = prev; + tail->next = tail->prev = NULL; + return tail->buf; + } + + DRM_DEBUG("returning NULL, tail->buf=%p!\n", tail->buf); + return NULL; +} + +void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + drm_savage_buf_priv_t *entry = buf->dev_private, *prev, *next; + + DRM_DEBUG("age=0x%04x wrap=%d\n", entry->age.event, entry->age.wrap); + + if (entry->next != NULL || entry->prev != NULL) { + DRM_ERROR("entry already on freelist.\n"); + return; + } + + prev = &dev_priv->head; + next = prev->next; + prev->next = entry; + next->prev = entry; + entry->prev = prev; + entry->next = next; +} + +/* + * Command DMA + */ +static int savage_dma_init(drm_savage_private_t *dev_priv) +{ + unsigned int i; + + dev_priv->nr_dma_pages = dev_priv->cmd_dma->size / + (SAVAGE_DMA_PAGE_SIZE*4); + dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) * + dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + if (dev_priv->dma_pages == NULL) + return DRM_ERR(ENOMEM); + + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, 0, 0); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + SET_AGE(&dev_priv->last_dma_age, 0, 0); + + dev_priv->first_dma_page = 0; + dev_priv->current_dma_page = 0; + + return 0; +} + +void savage_dma_reset(drm_savage_private_t *dev_priv) +{ + uint16_t event; + unsigned int wrap, i; + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + SET_AGE(&dev_priv->last_dma_age, event, wrap); + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page) +{ + uint16_t event; + unsigned int wrap; + + /* Faked DMA buffer pages don't age. */ + if (dev_priv->cmd_dma == &dev_priv->fake_dma) + return; + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + event = dev_priv->status_ptr[1] & 0xffff; + else + event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ + + if (dev_priv->dma_pages[page].age.wrap > wrap || + (dev_priv->dma_pages[page].age.wrap == wrap && + dev_priv->dma_pages[page].age.event > event)) { + if (dev_priv->wait_evnt(dev_priv, + dev_priv->dma_pages[page].age.event) + < 0) + DRM_ERROR("wait_evnt failed!\n"); + } +} + +uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) +{ + unsigned int cur = dev_priv->current_dma_page; + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - + dev_priv->dma_pages[cur].used; + unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) / + SAVAGE_DMA_PAGE_SIZE; + uint32_t *dma_ptr; + unsigned int i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n", + cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages); + + if (cur + nr_pages < dev_priv->nr_dma_pages) { + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur*SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + if (n < rest) + rest = n; + dev_priv->dma_pages[cur].used += rest; + n -= rest; + cur++; + } else { + dev_priv->dma_flush(dev_priv); + nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE; + for (i = cur; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age = dev_priv->last_dma_age; + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle; + dev_priv->first_dma_page = cur = 0; + } + for (i = cur; nr_pages > 0; ++i, --nr_pages) { +#if SAVAGE_DMA_DEBUG + if (dev_priv->dma_pages[i].used) { + DRM_ERROR("unflushed page %u: used=%u\n", + i, dev_priv->dma_pages[i].used); + } +#endif + if (n > SAVAGE_DMA_PAGE_SIZE) + dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE; + else + dev_priv->dma_pages[i].used = n; + n -= SAVAGE_DMA_PAGE_SIZE; + } + dev_priv->current_dma_page = --i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n", + i, dev_priv->dma_pages[i].used, n); + + savage_dma_wait(dev_priv, dev_priv->current_dma_page); + + return dma_ptr; +} + +static void savage_dma_flush(drm_savage_private_t *dev_priv) +{ + unsigned int first = dev_priv->first_dma_page; + unsigned int cur = dev_priv->current_dma_page; + uint16_t event; + unsigned int wrap, pad, align, len, i; + unsigned long phys_addr; + BCI_LOCALS; + + if (first == cur && + dev_priv->dma_pages[cur].used == dev_priv->dma_pages[cur].flushed) + return; + + /* pad length to multiples of 2 entries + * align start of next DMA block to multiles of 8 entries */ + pad = -dev_priv->dma_pages[cur].used & 1; + align = -(dev_priv->dma_pages[cur].used + pad) & 7; + + DRM_DEBUG("first=%u, cur=%u, first->flushed=%u, cur->used=%u, " + "pad=%u, align=%u\n", + first, cur, dev_priv->dma_pages[first].flushed, + dev_priv->dma_pages[cur].used, pad, align); + + /* pad with noops */ + if (pad) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + dev_priv->dma_pages[cur].used += pad; + while(pad != 0) { + *dma_ptr++ = BCI_CMD_WAIT; + pad--; + } + } + + DRM_MEMORYBARRIER(); + + /* do flush ... */ + phys_addr = dev_priv->cmd_dma->offset + + (first * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[first].flushed) * 4; + len = (cur - first) * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used - + dev_priv->dma_pages[first].flushed; + + DRM_DEBUG("phys_addr=%lx, len=%u\n", + phys_addr | dev_priv->dma_type, len); + + BEGIN_BCI(3); + BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1); + BCI_WRITE(phys_addr | dev_priv->dma_type); + BCI_DMA(len); + + /* fix alignment of the start of the next block */ + dev_priv->dma_pages[cur].used += align; + + /* age DMA pages */ + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = first; i < cur; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + /* age the current page only when it's full */ + if (dev_priv->dma_pages[cur].used == SAVAGE_DMA_PAGE_SIZE) { + SET_AGE(&dev_priv->dma_pages[cur].age, event, wrap); + dev_priv->dma_pages[cur].used = 0; + dev_priv->dma_pages[cur].flushed = 0; + /* advance to next page */ + cur++; + if (cur == dev_priv->nr_dma_pages) + cur = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = cur; + } else { + dev_priv->first_dma_page = cur; + dev_priv->dma_pages[cur].flushed = dev_priv->dma_pages[i].used; + } + SET_AGE(&dev_priv->last_dma_age, event, wrap); + + DRM_DEBUG("first=cur=%u, cur->used=%u, cur->flushed=%u\n", cur, + dev_priv->dma_pages[cur].used, + dev_priv->dma_pages[cur].flushed); +} + +static void savage_fake_dma_flush(drm_savage_private_t *dev_priv) +{ + unsigned int i, j; + BCI_LOCALS; + + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n", + dev_priv->first_dma_page, dev_priv->current_dma_page, + dev_priv->dma_pages[dev_priv->current_dma_page].used); + + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used; + ++i) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + i * SAVAGE_DMA_PAGE_SIZE; +#if SAVAGE_DMA_DEBUG + /* Sanity check: all pages except the last one must be full. */ + if (i < dev_priv->current_dma_page && + dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) { + DRM_ERROR("partial DMA page %u: used=%u", + i, dev_priv->dma_pages[i].used); + } +#endif + BEGIN_BCI(dev_priv->dma_pages[i].used); + for (j = 0; j < dev_priv->dma_pages[i].used; ++j) { + BCI_WRITE(dma_ptr[j]); + } + dev_priv->dma_pages[i].used = 0; + } + + /* reset to first page */ + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +/* + * Initalize mappings. On Savage4 and SavageIX the alignment + * and size of the aperture is not suitable for automatic MTRR setup + * in drm_addmap. Therefore we do it manually before the maps are + * initialized. We also need to take care of deleting the MTRRs in + * postcleanup. + */ +int savage_preinit(drm_device_t *dev, unsigned long chipset) +{ + drm_savage_private_t *dev_priv; + unsigned long mmio_base, fb_base, fb_size, aperture_base; + /* fb_rsrc and aper_rsrc aren't really used currently, but still exist + * in case we decide we need information on the BAR for BSD in the + * future. + */ + unsigned int fb_rsrc, aper_rsrc; + int ret = 0; + + dev_priv = drm_alloc(sizeof(drm_savage_private_t), DRM_MEM_DRIVER); + if (dev_priv == NULL) + return DRM_ERR(ENOMEM); + + memset(dev_priv, 0, sizeof(drm_savage_private_t)); + dev->dev_private = (void *)dev_priv; + dev_priv->chipset = (enum savage_family)chipset; + + dev_priv->mtrr[0].handle = -1; + dev_priv->mtrr[1].handle = -1; + dev_priv->mtrr[2].handle = -1; + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + fb_rsrc = 0; + fb_base = drm_get_resource_start(dev, 0); + fb_size = SAVAGE_FB_SIZE_S3; + mmio_base = fb_base + SAVAGE_FB_SIZE_S3; + aper_rsrc = 0; + aperture_base = fb_base + SAVAGE_APERTURE_OFFSET; + /* this should always be true */ + if (drm_get_resource_len(dev, 0) == 0x08000000) { + /* Don't make MMIO write-cobining! We need 3 + * MTRRs. */ + dev_priv->mtrr[0].base = fb_base; + dev_priv->mtrr[0].size = 0x01000000; + dev_priv->mtrr[0].handle = mtrr_add( + dev_priv->mtrr[0].base, dev_priv->mtrr[0].size, + MTRR_TYPE_WRCOMB, 1); + dev_priv->mtrr[1].base = fb_base+0x02000000; + dev_priv->mtrr[1].size = 0x02000000; + dev_priv->mtrr[1].handle = mtrr_add( + dev_priv->mtrr[1].base, dev_priv->mtrr[1].size, + MTRR_TYPE_WRCOMB, 1); + dev_priv->mtrr[2].base = fb_base+0x04000000; + dev_priv->mtrr[2].size = 0x04000000; + dev_priv->mtrr[2].handle = mtrr_add( + dev_priv->mtrr[2].base, dev_priv->mtrr[2].size, + MTRR_TYPE_WRCOMB, 1); + } else { + DRM_ERROR("strange pci_resource_len %08lx\n", + drm_get_resource_len(dev, 0)); + } + } else if (chipset != S3_SUPERSAVAGE && chipset != S3_SAVAGE2000) { + mmio_base = drm_get_resource_start(dev, 0); + fb_rsrc = 1; + fb_base = drm_get_resource_start(dev, 1); + fb_size = SAVAGE_FB_SIZE_S4; + aper_rsrc = 1; + aperture_base = fb_base + SAVAGE_APERTURE_OFFSET; + /* this should always be true */ + if (drm_get_resource_len(dev, 1) == 0x08000000) { + /* Can use one MTRR to cover both fb and + * aperture. */ + dev_priv->mtrr[0].base = fb_base; + dev_priv->mtrr[0].size = 0x08000000; + dev_priv->mtrr[0].handle = mtrr_add( + dev_priv->mtrr[0].base, dev_priv->mtrr[0].size, + MTRR_TYPE_WRCOMB, 1); + } else { + DRM_ERROR("strange pci_resource_len %08lx\n", + drm_get_resource_len(dev, 1)); + } + } else { + mmio_base = drm_get_resource_start(dev, 0); + fb_rsrc = 1; + fb_base = drm_get_resource_start(dev, 1); + fb_size = drm_get_resource_len(dev, 1); + aper_rsrc = 2; + aperture_base = drm_get_resource_start(dev, 2); + /* Automatic MTRR setup will do the right thing. */ + } + + ret = drm_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE, _DRM_REGISTERS, + _DRM_READ_ONLY, &dev_priv->mmio); + if (ret) + return ret; + + ret = drm_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER, + _DRM_WRITE_COMBINING, &dev_priv->fb); + if (ret) + return ret; + + ret = drm_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE, + _DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING, + &dev_priv->aperture); + if (ret) + return ret; + + return ret; +} + +/* + * Delete MTRRs and free device-private data. + */ +int savage_postcleanup(drm_device_t *dev) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + int i; + + for (i = 0; i < 3; ++i) + if (dev_priv->mtrr[i].handle >= 0) + mtrr_del(dev_priv->mtrr[i].handle, + dev_priv->mtrr[i].base, + dev_priv->mtrr[i].size); + + drm_free(dev_priv, sizeof(drm_savage_private_t), DRM_MEM_DRIVER); + + return 0; +} + +static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + + if (init->fb_bpp != 16 && init->fb_bpp != 32) { + DRM_ERROR("invalid frame buffer bpp %d!\n", init->fb_bpp); + return DRM_ERR(EINVAL); + } + if (init->depth_bpp != 16 && init->depth_bpp != 32) { + DRM_ERROR("invalid depth buffer bpp %d!\n", init->fb_bpp); + return DRM_ERR(EINVAL); + } + if (init->dma_type != SAVAGE_DMA_AGP && + init->dma_type != SAVAGE_DMA_PCI) { + DRM_ERROR("invalid dma memory type %d!\n", init->dma_type); + return DRM_ERR(EINVAL); + } + + dev_priv->cob_size = init->cob_size; + dev_priv->bci_threshold_lo = init->bci_threshold_lo; + dev_priv->bci_threshold_hi = init->bci_threshold_hi; + dev_priv->dma_type = init->dma_type; + + dev_priv->fb_bpp = init->fb_bpp; + dev_priv->front_offset = init->front_offset; + dev_priv->front_pitch = init->front_pitch; + dev_priv->back_offset = init->back_offset; + dev_priv->back_pitch = init->back_pitch; + dev_priv->depth_bpp = init->depth_bpp; + dev_priv->depth_offset = init->depth_offset; + dev_priv->depth_pitch = init->depth_pitch; + + dev_priv->texture_offset = init->texture_offset; + dev_priv->texture_size = init->texture_size; + + DRM_GETSAREA(); + if (!dev_priv->sarea) { + DRM_ERROR("could not find sarea!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (init->status_offset != 0) { + dev_priv->status = drm_core_findmap(dev, init->status_offset); + if (!dev_priv->status) { + DRM_ERROR("could not find shadow status region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + } else { + dev_priv->status = NULL; + } + if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) { + dev->agp_buffer_map = drm_core_findmap(dev, + init->buffers_offset); + if (!dev->agp_buffer_map) { + DRM_ERROR("could not find DMA buffer region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + drm_core_ioremap(dev->agp_buffer_map, dev); + if (!dev->agp_buffer_map) { + DRM_ERROR("failed to ioremap DMA buffer region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + } + if (init->agp_textures_offset) { + dev_priv->agp_textures = + drm_core_findmap(dev, init->agp_textures_offset); + if (!dev_priv->agp_textures) { + DRM_ERROR("could not find agp texture region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + } else { + dev_priv->agp_textures = NULL; + } + + if (init->cmd_dma_offset) { + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + DRM_ERROR("command DMA not supported on " + "Savage3D/MX/IX.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (dev->dma && dev->dma->buflist) { + DRM_ERROR("command and vertex DMA not supported " + "at the same time.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset); + if (!dev_priv->cmd_dma) { + DRM_ERROR("could not find command DMA region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->cmd_dma->type != _DRM_AGP) { + DRM_ERROR("AGP command DMA region is not a " + "_DRM_AGP map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + drm_core_ioremap(dev_priv->cmd_dma, dev); + if (!dev_priv->cmd_dma->handle) { + DRM_ERROR("failed to ioremap command " + "DMA region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) { + DRM_ERROR("PCI command DMA region is not a " + "_DRM_CONSISTENT map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + } else { + dev_priv->cmd_dma = NULL; + } + + dev_priv->dma_flush = savage_dma_flush; + if (!dev_priv->cmd_dma) { + DRM_DEBUG("falling back to faked command DMA.\n"); + dev_priv->fake_dma.offset = 0; + dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE; + dev_priv->fake_dma.type = _DRM_SHM; + dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE, + DRM_MEM_DRIVER); + if (!dev_priv->fake_dma.handle) { + DRM_ERROR("could not allocate faked DMA buffer!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + dev_priv->cmd_dma = &dev_priv->fake_dma; + dev_priv->dma_flush = savage_fake_dma_flush; + } + + dev_priv->sarea_priv = + (drm_savage_sarea_t *)((uint8_t *)dev_priv->sarea->handle + + init->sarea_priv_offset); + + /* setup bitmap descriptors */ + { + unsigned int color_tile_format; + unsigned int depth_tile_format; + unsigned int front_stride, back_stride, depth_stride; + if (dev_priv->chipset <= S3_SAVAGE4) { + color_tile_format = dev_priv->fb_bpp == 16 ? + SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP; + depth_tile_format = dev_priv->depth_bpp == 16 ? + SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP; + } else { + color_tile_format = SAVAGE_BD_TILE_DEST; + depth_tile_format = SAVAGE_BD_TILE_DEST; + } + front_stride = dev_priv->front_pitch / (dev_priv->fb_bpp/8); + back_stride = dev_priv-> back_pitch / (dev_priv->fb_bpp/8); + depth_stride = dev_priv->depth_pitch / (dev_priv->depth_bpp/8); + + dev_priv->front_bd = front_stride | SAVAGE_BD_BW_DISABLE | + (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) | + (color_tile_format << SAVAGE_BD_TILE_SHIFT); + + dev_priv-> back_bd = back_stride | SAVAGE_BD_BW_DISABLE | + (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) | + (color_tile_format << SAVAGE_BD_TILE_SHIFT); + + dev_priv->depth_bd = depth_stride | SAVAGE_BD_BW_DISABLE | + (dev_priv->depth_bpp << SAVAGE_BD_BPP_SHIFT) | + (depth_tile_format << SAVAGE_BD_TILE_SHIFT); + } + + /* setup status and bci ptr */ + dev_priv->event_counter = 0; + dev_priv->event_wrap = 0; + dev_priv->bci_ptr = (volatile uint32_t *) + ((uint8_t *)dev_priv->mmio->handle + SAVAGE_BCI_OFFSET); + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S3D; + } else { + dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S4; + } + if (dev_priv->status != NULL) { + dev_priv->status_ptr = + (volatile uint32_t *)dev_priv->status->handle; + dev_priv->wait_fifo = savage_bci_wait_fifo_shadow; + dev_priv->wait_evnt = savage_bci_wait_event_shadow; + dev_priv->status_ptr[1023] = dev_priv->event_counter; + } else { + dev_priv->status_ptr = NULL; + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + dev_priv->wait_fifo = savage_bci_wait_fifo_s3d; + } else { + dev_priv->wait_fifo = savage_bci_wait_fifo_s4; + } + dev_priv->wait_evnt = savage_bci_wait_event_reg; + } + + /* cliprect functions */ + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) + dev_priv->emit_clip_rect = savage_emit_clip_rect_s3d; + else + dev_priv->emit_clip_rect = savage_emit_clip_rect_s4; + + if (savage_freelist_init(dev) < 0) { + DRM_ERROR("could not initialize freelist\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + + if (savage_dma_init(dev_priv) < 0) { + DRM_ERROR("could not initialize command DMA\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + + return 0; +} + +int savage_do_cleanup_bci(drm_device_t *dev) +{ + drm_savage_private_t *dev_priv = dev->dev_private; + + if (dev_priv->cmd_dma == &dev_priv->fake_dma) { + if (dev_priv->fake_dma.handle) + drm_free(dev_priv->fake_dma.handle, + SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); + } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle && + dev_priv->cmd_dma->type == _DRM_AGP && + dev_priv->dma_type == SAVAGE_DMA_AGP) + drm_core_ioremapfree(dev_priv->cmd_dma, dev); + + if (dev_priv->dma_type == SAVAGE_DMA_AGP && + dev->agp_buffer_map && dev->agp_buffer_map->handle) { + drm_core_ioremapfree(dev->agp_buffer_map, dev); + /* make sure the next instance (which may be running + * in PCI mode) doesn't try to use an old + * agp_buffer_map. */ + dev->agp_buffer_map = NULL; + } + + if (dev_priv->dma_pages) + drm_free(dev_priv->dma_pages, + sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + + return 0; +} + +static int savage_bci_init(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_savage_init_t init; + + LOCK_TEST_WITH_RETURN(dev, filp); + + DRM_COPY_FROM_USER_IOCTL(init, (drm_savage_init_t __user *)data, + sizeof(init)); + + switch (init.func) { + case SAVAGE_INIT_BCI: + return savage_do_init_bci(dev, &init); + case SAVAGE_CLEANUP_BCI: + return savage_do_cleanup_bci(dev); + } + + return DRM_ERR(EINVAL); +} + +static int savage_bci_event_emit(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_savage_private_t *dev_priv = dev->dev_private; + drm_savage_event_emit_t event; + + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, filp); + + DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_emit_t __user *)data, + sizeof(event)); + + event.count = savage_bci_emit_event(dev_priv, event.flags); + event.count |= dev_priv->event_wrap << 16; + DRM_COPY_TO_USER_IOCTL(&((drm_savage_event_emit_t __user *)data)->count, + event.count, sizeof(event.count)); + return 0; +} + +static int savage_bci_event_wait(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_savage_private_t *dev_priv = dev->dev_private; + drm_savage_event_wait_t event; + unsigned int event_e, hw_e; + unsigned int event_w, hw_w; + + DRM_DEBUG("\n"); + + DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_wait_t __user *)data, + sizeof(event)); + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + hw_e = dev_priv->status_ptr[1] & 0xffff; + else + hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + hw_w = dev_priv->event_wrap; + if (hw_e > dev_priv->event_counter) + hw_w--; /* hardware hasn't passed the last wrap yet */ + + event_e = event.count & 0xffff; + event_w = event.count >> 16; + + /* Don't need to wait if + * - event counter wrapped since the event was emitted or + * - the hardware has advanced up to or over the event to wait for. + */ + if (event_w < hw_w || (event_w == hw_w && event_e <= hw_e) ) + return 0; + else + return dev_priv->wait_evnt(dev_priv, event_e); +} + +/* + * DMA buffer management + */ + +static int savage_bci_get_buffers(DRMFILE filp, drm_device_t *dev, drm_dma_t *d) +{ + drm_buf_t *buf; + int i; + + for (i = d->granted_count; i < d->request_count; i++) { + buf = savage_freelist_get(dev); + if (!buf) + return DRM_ERR(EAGAIN); + + buf->filp = filp; + + if (DRM_COPY_TO_USER(&d->request_indices[i], + &buf->idx, sizeof(buf->idx))) + return DRM_ERR(EFAULT); + if (DRM_COPY_TO_USER(&d->request_sizes[i], + &buf->total, sizeof(buf->total))) + return DRM_ERR(EFAULT); + + d->granted_count++; + } + return 0; +} + +int savage_bci_buffers(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_device_dma_t *dma = dev->dma; + drm_dma_t d; + int ret = 0; + + LOCK_TEST_WITH_RETURN(dev, filp); + + DRM_COPY_FROM_USER_IOCTL(d, (drm_dma_t __user *)data, sizeof(d)); + + /* Please don't send us buffers. + */ + if (d.send_count != 0) { + DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n", + DRM_CURRENTPID, d.send_count); + return DRM_ERR(EINVAL); + } + + /* We'll send you buffers. + */ + if (d.request_count < 0 || d.request_count > dma->buf_count) { + DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n", + DRM_CURRENTPID, d.request_count, dma->buf_count); + return DRM_ERR(EINVAL); + } + + d.granted_count = 0; + + if (d.request_count) { + ret = savage_bci_get_buffers(filp, dev, &d); + } + + DRM_COPY_TO_USER_IOCTL((drm_dma_t __user *)data, d, sizeof(d)); + + return ret; +} + +void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp) { + drm_device_dma_t *dma = dev->dma; + drm_savage_private_t *dev_priv = dev->dev_private; + int i; + + if (!dma) + return; + if (!dev_priv) + return; + if (!dma->buflist) + return; + + /*i830_flush_queue(dev);*/ + + for (i = 0; i < dma->buf_count; i++) { + drm_buf_t *buf = dma->buflist[i]; + drm_savage_buf_priv_t *buf_priv = buf->dev_private; + + if (buf->filp == filp && buf_priv && + buf_priv->next == NULL && buf_priv->prev == NULL) { + uint16_t event; + DRM_DEBUG("reclaimed from client\n"); + event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); + SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); + savage_freelist_put(dev, buf); + } + } + + drm_core_reclaim_buffers(dev, filp); +} + + +drm_ioctl_desc_t savage_ioctls[] = { + [DRM_IOCTL_NR(DRM_SAVAGE_BCI_INIT)] = {savage_bci_init, 1, 1}, + [DRM_IOCTL_NR(DRM_SAVAGE_BCI_CMDBUF)] = {savage_bci_cmdbuf, 1, 0}, + [DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_EMIT)] = {savage_bci_event_emit, 1, 0}, + [DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_WAIT)] = {savage_bci_event_wait, 1, 0}, +}; + +int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls); diff --git a/drivers/char/drm/savage_drm.h b/drivers/char/drm/savage_drm.h new file mode 100644 index 000000000000..6526c9aa7589 --- /dev/null +++ b/drivers/char/drm/savage_drm.h @@ -0,0 +1,209 @@ +/* savage_drm.h -- Public header for the savage driver + * + * Copyright 2004 Felix Kuehling + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __SAVAGE_DRM_H__ +#define __SAVAGE_DRM_H__ + +#ifndef __SAVAGE_SAREA_DEFINES__ +#define __SAVAGE_SAREA_DEFINES__ + +/* 2 heaps (1 for card, 1 for agp), each divided into upto 128 + * regions, subject to a minimum region size of (1<<16) == 64k. + * + * Clients may subdivide regions internally, but when sharing between + * clients, the region size is the minimum granularity. + */ + +#define SAVAGE_CARD_HEAP 0 +#define SAVAGE_AGP_HEAP 1 +#define SAVAGE_NR_TEX_HEAPS 2 +#define SAVAGE_NR_TEX_REGIONS 16 +#define SAVAGE_LOG_MIN_TEX_REGION_SIZE 16 + +#endif /* __SAVAGE_SAREA_DEFINES__ */ + +typedef struct _drm_savage_sarea { + /* LRU lists for texture memory in agp space and on the card. + */ + drm_tex_region_t texList[SAVAGE_NR_TEX_HEAPS][SAVAGE_NR_TEX_REGIONS+1]; + unsigned int texAge[SAVAGE_NR_TEX_HEAPS]; + + /* Mechanism to validate card state. + */ + int ctxOwner; +} drm_savage_sarea_t, *drm_savage_sarea_ptr; + +/* Savage-specific ioctls + */ +#define DRM_SAVAGE_BCI_INIT 0x00 +#define DRM_SAVAGE_BCI_CMDBUF 0x01 +#define DRM_SAVAGE_BCI_EVENT_EMIT 0x02 +#define DRM_SAVAGE_BCI_EVENT_WAIT 0x03 + +#define DRM_IOCTL_SAVAGE_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) +#define DRM_IOCTL_SAVAGE_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) +#define DRM_IOCTL_SAVAGE_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) +#define DRM_IOCTL_SAVAGE_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) + +#define SAVAGE_DMA_PCI 1 +#define SAVAGE_DMA_AGP 3 +typedef struct drm_savage_init { + enum { + SAVAGE_INIT_BCI = 1, + SAVAGE_CLEANUP_BCI = 2 + } func; + unsigned int sarea_priv_offset; + + /* some parameters */ + unsigned int cob_size; + unsigned int bci_threshold_lo, bci_threshold_hi; + unsigned int dma_type; + + /* frame buffer layout */ + unsigned int fb_bpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; + unsigned int depth_bpp; + unsigned int depth_offset, depth_pitch; + + /* local textures */ + unsigned int texture_offset; + unsigned int texture_size; + + /* physical locations of non-permanent maps */ + unsigned long status_offset; + unsigned long buffers_offset; + unsigned long agp_textures_offset; + unsigned long cmd_dma_offset; +} drm_savage_init_t; + +typedef union drm_savage_cmd_header drm_savage_cmd_header_t; +typedef struct drm_savage_cmdbuf { + /* command buffer in client's address space */ + drm_savage_cmd_header_t __user *cmd_addr; + unsigned int size; /* size of the command buffer in 64bit units */ + + unsigned int dma_idx; /* DMA buffer index to use */ + int discard; /* discard DMA buffer when done */ + /* vertex buffer in client's address space */ + unsigned int __user *vb_addr; + unsigned int vb_size; /* size of client vertex buffer in bytes */ + unsigned int vb_stride; /* stride of vertices in 32bit words */ + /* boxes in client's address space */ + drm_clip_rect_t __user *box_addr; + unsigned int nbox; /* number of clipping boxes */ +} drm_savage_cmdbuf_t; + +#define SAVAGE_WAIT_2D 0x1 /* wait for 2D idle before updating event tag */ +#define SAVAGE_WAIT_3D 0x2 /* wait for 3D idle before updating event tag */ +#define SAVAGE_WAIT_IRQ 0x4 /* emit or wait for IRQ, not implemented yet */ +typedef struct drm_savage_event { + unsigned int count; + unsigned int flags; +} drm_savage_event_emit_t, drm_savage_event_wait_t; + +/* Commands for the cmdbuf ioctl + */ +#define SAVAGE_CMD_STATE 0 /* a range of state registers */ +#define SAVAGE_CMD_DMA_PRIM 1 /* vertices from DMA buffer */ +#define SAVAGE_CMD_VB_PRIM 2 /* vertices from client vertex buffer */ +#define SAVAGE_CMD_DMA_IDX 3 /* indexed vertices from DMA buffer */ +#define SAVAGE_CMD_VB_IDX 4 /* indexed vertices client vertex buffer */ +#define SAVAGE_CMD_CLEAR 5 /* clear buffers */ +#define SAVAGE_CMD_SWAP 6 /* swap buffers */ + +/* Primitive types +*/ +#define SAVAGE_PRIM_TRILIST 0 /* triangle list */ +#define SAVAGE_PRIM_TRISTRIP 1 /* triangle strip */ +#define SAVAGE_PRIM_TRIFAN 2 /* triangle fan */ +#define SAVAGE_PRIM_TRILIST_201 3 /* reorder verts for correct flat + * shading on s3d */ + +/* Skip flags (vertex format) + */ +#define SAVAGE_SKIP_Z 0x01 +#define SAVAGE_SKIP_W 0x02 +#define SAVAGE_SKIP_C0 0x04 +#define SAVAGE_SKIP_C1 0x08 +#define SAVAGE_SKIP_S0 0x10 +#define SAVAGE_SKIP_T0 0x20 +#define SAVAGE_SKIP_ST0 0x30 +#define SAVAGE_SKIP_S1 0x40 +#define SAVAGE_SKIP_T1 0x80 +#define SAVAGE_SKIP_ST1 0xc0 +#define SAVAGE_SKIP_ALL_S3D 0x3f +#define SAVAGE_SKIP_ALL_S4 0xff + +/* Buffer names for clear command + */ +#define SAVAGE_FRONT 0x1 +#define SAVAGE_BACK 0x2 +#define SAVAGE_DEPTH 0x4 + +/* 64-bit command header + */ +union drm_savage_cmd_header { + struct { + unsigned char cmd; /* command */ + unsigned char pad0; + unsigned short pad1; + unsigned short pad2; + unsigned short pad3; + } cmd; /* generic */ + struct { + unsigned char cmd; + unsigned char global; /* need idle engine? */ + unsigned short count; /* number of consecutive registers */ + unsigned short start; /* first register */ + unsigned short pad3; + } state; /* SAVAGE_CMD_STATE */ + struct { + unsigned char cmd; + unsigned char prim; /* primitive type */ + unsigned short skip; /* vertex format (skip flags) */ + unsigned short count; /* number of vertices */ + unsigned short start; /* first vertex in DMA/vertex buffer */ + } prim; /* SAVAGE_CMD_DMA_PRIM, SAVAGE_CMD_VB_PRIM */ + struct { + unsigned char cmd; + unsigned char prim; + unsigned short skip; + unsigned short count; /* number of indices that follow */ + unsigned short pad3; + } idx; /* SAVAGE_CMD_DMA_IDX, SAVAGE_CMD_VB_IDX */ + struct { + unsigned char cmd; + unsigned char pad0; + unsigned short pad1; + unsigned int flags; + } clear0; /* SAVAGE_CMD_CLEAR */ + struct { + unsigned int mask; + unsigned int value; + } clear1; /* SAVAGE_CMD_CLEAR data */ +}; + +#endif diff --git a/drivers/char/drm/savage_drv.c b/drivers/char/drm/savage_drv.c new file mode 100644 index 000000000000..ac8d270427ca --- /dev/null +++ b/drivers/char/drm/savage_drv.c @@ -0,0 +1,112 @@ +/* savage_drv.c -- Savage driver for Linux + * + * Copyright 2004 Felix Kuehling + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "drmP.h" +#include "savage_drm.h" +#include "savage_drv.h" + +#include "drm_pciids.h" + +static int postinit( struct drm_device *dev, unsigned long flags ) +{ + DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d: %s\n", + DRIVER_NAME, + DRIVER_MAJOR, + DRIVER_MINOR, + DRIVER_PATCHLEVEL, + DRIVER_DATE, + dev->primary.minor, + pci_pretty_name(dev->pdev) + ); + return 0; +} + +static int version( drm_version_t *version ) +{ + int len; + + version->version_major = DRIVER_MAJOR; + version->version_minor = DRIVER_MINOR; + version->version_patchlevel = DRIVER_PATCHLEVEL; + DRM_COPY( version->name, DRIVER_NAME ); + DRM_COPY( version->date, DRIVER_DATE ); + DRM_COPY( version->desc, DRIVER_DESC ); + return 0; +} + +static struct pci_device_id pciidlist[] = { + savage_PCI_IDS +}; + +extern drm_ioctl_desc_t savage_ioctls[]; +extern int savage_max_ioctl; + +static struct drm_driver driver = { + .driver_features = + DRIVER_USE_AGP | DRIVER_USE_MTRR | + DRIVER_HAVE_DMA | DRIVER_PCI_DMA, + .dev_priv_size = sizeof(drm_savage_buf_priv_t), + .preinit = savage_preinit, + .postinit = postinit, + .postcleanup = savage_postcleanup, + .reclaim_buffers = savage_reclaim_buffers, + .get_map_ofs = drm_core_get_map_ofs, + .get_reg_ofs = drm_core_get_reg_ofs, + .version = version, + .ioctls = savage_ioctls, + .dma_ioctl = savage_bci_buffers, + .fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .ioctl = drm_ioctl, + .mmap = drm_mmap, + .poll = drm_poll, + .fasync = drm_fasync, + }, + .pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + } +}; + +static int __init savage_init(void) +{ + driver.num_ioctls = savage_max_ioctl; + return drm_init(&driver); +} + +static void __exit savage_exit(void) +{ + drm_exit(&driver); +} + +module_init(savage_init); +module_exit(savage_exit); + +MODULE_AUTHOR( DRIVER_AUTHOR ); +MODULE_DESCRIPTION( DRIVER_DESC ); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/char/drm/savage_drv.h b/drivers/char/drm/savage_drv.h new file mode 100644 index 000000000000..a45434944658 --- /dev/null +++ b/drivers/char/drm/savage_drv.h @@ -0,0 +1,579 @@ +/* savage_drv.h -- Private header for the savage driver + * + * Copyright 2004 Felix Kuehling + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __SAVAGE_DRV_H__ +#define __SAVAGE_DRV_H__ + +#define DRIVER_AUTHOR "Felix Kuehling" + +#define DRIVER_NAME "savage" +#define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]" +#define DRIVER_DATE "20050313" + +#define DRIVER_MAJOR 2 +#define DRIVER_MINOR 4 +#define DRIVER_PATCHLEVEL 1 +/* Interface history: + * + * 1.x The DRM driver from the VIA/S3 code drop, basically a dummy + * 2.0 The first real DRM + * 2.1 Scissors registers managed by the DRM, 3D operations clipped by + * cliprects of the cmdbuf ioctl + * 2.2 Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX + * 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits + * wide and thus very long lived (unlikely to ever wrap). The size + * in the struct was 32 bits before, but only 16 bits were used + * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is + * actually used + */ + +typedef struct drm_savage_age { + uint16_t event; + unsigned int wrap; +} drm_savage_age_t; + +typedef struct drm_savage_buf_priv { + struct drm_savage_buf_priv *next; + struct drm_savage_buf_priv *prev; + drm_savage_age_t age; + drm_buf_t *buf; +} drm_savage_buf_priv_t; + +typedef struct drm_savage_dma_page { + drm_savage_age_t age; + unsigned int used, flushed; +} drm_savage_dma_page_t; +#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */ +/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command + * size of 16kbytes or 4k entries. Minimum requirement would be + * 10kbytes for 255 40-byte vertices in one drawing command. */ +#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4) + +/* interesting bits of hardware state that are saved in dev_priv */ +typedef union { + struct drm_savage_common_state { + uint32_t vbaddr; + } common; + struct { + unsigned char pad[sizeof(struct drm_savage_common_state)]; + uint32_t texctrl, texaddr; + uint32_t scstart, new_scstart; + uint32_t scend, new_scend; + } s3d; + struct { + unsigned char pad[sizeof(struct drm_savage_common_state)]; + uint32_t texdescr, texaddr0, texaddr1; + uint32_t drawctrl0, new_drawctrl0; + uint32_t drawctrl1, new_drawctrl1; + } s4; +} drm_savage_state_t; + +/* these chip tags should match the ones in the 2D driver in savage_regs.h. */ +enum savage_family { + S3_UNKNOWN = 0, + S3_SAVAGE3D, + S3_SAVAGE_MX, + S3_SAVAGE4, + S3_PROSAVAGE, + S3_TWISTER, + S3_PROSAVAGEDDR, + S3_SUPERSAVAGE, + S3_SAVAGE2000, + S3_LAST +}; + +#define S3_SAVAGE3D_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX)) + +#define S3_SAVAGE4_SERIES(chip) ((chip==S3_SAVAGE4) \ + || (chip==S3_PROSAVAGE) \ + || (chip==S3_TWISTER) \ + || (chip==S3_PROSAVAGEDDR)) + +#define S3_SAVAGE_MOBILE_SERIES(chip) ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE)) + +#define S3_SAVAGE_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000)) + +#define S3_MOBILE_TWISTER_SERIES(chip) ((chip==S3_TWISTER) \ + ||(chip==S3_PROSAVAGEDDR)) + +/* flags */ +#define SAVAGE_IS_AGP 1 + +typedef struct drm_savage_private { + drm_savage_sarea_t *sarea_priv; + + drm_savage_buf_priv_t head, tail; + + /* who am I? */ + enum savage_family chipset; + + unsigned int cob_size; + unsigned int bci_threshold_lo, bci_threshold_hi; + unsigned int dma_type; + + /* frame buffer layout */ + unsigned int fb_bpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; + unsigned int depth_bpp; + unsigned int depth_offset, depth_pitch; + + /* bitmap descriptors for swap and clear */ + unsigned int front_bd, back_bd, depth_bd; + + /* local textures */ + unsigned int texture_offset; + unsigned int texture_size; + + /* memory regions in physical memory */ + drm_local_map_t *sarea; + drm_local_map_t *mmio; + drm_local_map_t *fb; + drm_local_map_t *aperture; + drm_local_map_t *status; + drm_local_map_t *agp_textures; + drm_local_map_t *cmd_dma; + drm_local_map_t fake_dma; + + struct { + int handle; + unsigned long base, size; + } mtrr[3]; + + /* BCI and status-related stuff */ + volatile uint32_t *status_ptr, *bci_ptr; + uint32_t status_used_mask; + uint16_t event_counter; + unsigned int event_wrap; + + /* Savage4 command DMA */ + drm_savage_dma_page_t *dma_pages; + unsigned int nr_dma_pages, first_dma_page, current_dma_page; + drm_savage_age_t last_dma_age; + + /* saved hw state for global/local check on S3D */ + uint32_t hw_draw_ctrl, hw_zbuf_ctrl; + /* and for scissors (global, so don't emit if not changed) */ + uint32_t hw_scissors_start, hw_scissors_end; + + drm_savage_state_t state; + + /* after emitting a wait cmd Savage3D needs 63 nops before next DMA */ + unsigned int waiting; + + /* config/hardware-dependent function pointers */ + int (*wait_fifo)(struct drm_savage_private *dev_priv, unsigned int n); + int (*wait_evnt)(struct drm_savage_private *dev_priv, uint16_t e); + /* Err, there is a macro wait_event in include/linux/wait.h. + * Avoid unwanted macro expansion. */ + void (*emit_clip_rect)(struct drm_savage_private *dev_priv, + drm_clip_rect_t *pbox); + void (*dma_flush)(struct drm_savage_private *dev_priv); +} drm_savage_private_t; + +/* ioctls */ +extern int savage_bci_cmdbuf(DRM_IOCTL_ARGS); +extern int savage_bci_buffers(DRM_IOCTL_ARGS); + +/* BCI functions */ +extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, + unsigned int flags); +extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf); +extern void savage_dma_reset(drm_savage_private_t *dev_priv); +extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page); +extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, + unsigned int n); +extern int savage_preinit(drm_device_t *dev, unsigned long chipset); +extern int savage_postcleanup(drm_device_t *dev); +extern int savage_do_cleanup_bci(drm_device_t *dev); +extern void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp); + +/* state functions */ +extern void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, + drm_clip_rect_t *pbox); +extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, + drm_clip_rect_t *pbox); + +#define SAVAGE_FB_SIZE_S3 0x01000000 /* 16MB */ +#define SAVAGE_FB_SIZE_S4 0x02000000 /* 32MB */ +#define SAVAGE_MMIO_SIZE 0x00080000 /* 512kB */ +#define SAVAGE_APERTURE_OFFSET 0x02000000 /* 32MB */ +#define SAVAGE_APERTURE_SIZE 0x05000000 /* 5 tiled surfaces, 16MB each */ + +#define SAVAGE_BCI_OFFSET 0x00010000 /* offset of the BCI region + * inside the MMIO region */ +#define SAVAGE_BCI_FIFO_SIZE 32 /* number of entries in on-chip + * BCI FIFO */ + +/* + * MMIO registers + */ +#define SAVAGE_STATUS_WORD0 0x48C00 +#define SAVAGE_STATUS_WORD1 0x48C04 +#define SAVAGE_ALT_STATUS_WORD0 0x48C60 + +#define SAVAGE_FIFO_USED_MASK_S3D 0x0001ffff +#define SAVAGE_FIFO_USED_MASK_S4 0x001fffff + +/* Copied from savage_bci.h in the 2D driver with some renaming. */ + +/* Bitmap descriptors */ +#define SAVAGE_BD_STRIDE_SHIFT 0 +#define SAVAGE_BD_BPP_SHIFT 16 +#define SAVAGE_BD_TILE_SHIFT 24 +#define SAVAGE_BD_BW_DISABLE (1<<28) +/* common: */ +#define SAVAGE_BD_TILE_LINEAR 0 +/* savage4, MX, IX, 3D */ +#define SAVAGE_BD_TILE_16BPP 2 +#define SAVAGE_BD_TILE_32BPP 3 +/* twister, prosavage, DDR, supersavage, 2000 */ +#define SAVAGE_BD_TILE_DEST 1 +#define SAVAGE_BD_TILE_TEXTURE 2 +/* GBD - BCI enable */ +/* savage4, MX, IX, 3D */ +#define SAVAGE_GBD_BCI_ENABLE 8 +/* twister, prosavage, DDR, supersavage, 2000 */ +#define SAVAGE_GBD_BCI_ENABLE_TWISTER 0 + +#define SAVAGE_GBD_BIG_ENDIAN 4 +#define SAVAGE_GBD_LITTLE_ENDIAN 0 +#define SAVAGE_GBD_64 1 + +/* Global Bitmap Descriptor */ +#define SAVAGE_BCI_GLB_BD_LOW 0x8168 +#define SAVAGE_BCI_GLB_BD_HIGH 0x816C + +/* + * BCI registers + */ +/* Savage4/Twister/ProSavage 3D registers */ +#define SAVAGE_DRAWLOCALCTRL_S4 0x1e +#define SAVAGE_TEXPALADDR_S4 0x1f +#define SAVAGE_TEXCTRL0_S4 0x20 +#define SAVAGE_TEXCTRL1_S4 0x21 +#define SAVAGE_TEXADDR0_S4 0x22 +#define SAVAGE_TEXADDR1_S4 0x23 +#define SAVAGE_TEXBLEND0_S4 0x24 +#define SAVAGE_TEXBLEND1_S4 0x25 +#define SAVAGE_TEXXPRCLR_S4 0x26 /* never used */ +#define SAVAGE_TEXDESCR_S4 0x27 +#define SAVAGE_FOGTABLE_S4 0x28 +#define SAVAGE_FOGCTRL_S4 0x30 +#define SAVAGE_STENCILCTRL_S4 0x31 +#define SAVAGE_ZBUFCTRL_S4 0x32 +#define SAVAGE_ZBUFOFF_S4 0x33 +#define SAVAGE_DESTCTRL_S4 0x34 +#define SAVAGE_DRAWCTRL0_S4 0x35 +#define SAVAGE_DRAWCTRL1_S4 0x36 +#define SAVAGE_ZWATERMARK_S4 0x37 +#define SAVAGE_DESTTEXRWWATERMARK_S4 0x38 +#define SAVAGE_TEXBLENDCOLOR_S4 0x39 +/* Savage3D/MX/IX 3D registers */ +#define SAVAGE_TEXPALADDR_S3D 0x18 +#define SAVAGE_TEXXPRCLR_S3D 0x19 /* never used */ +#define SAVAGE_TEXADDR_S3D 0x1A +#define SAVAGE_TEXDESCR_S3D 0x1B +#define SAVAGE_TEXCTRL_S3D 0x1C +#define SAVAGE_FOGTABLE_S3D 0x20 +#define SAVAGE_FOGCTRL_S3D 0x30 +#define SAVAGE_DRAWCTRL_S3D 0x31 +#define SAVAGE_ZBUFCTRL_S3D 0x32 +#define SAVAGE_ZBUFOFF_S3D 0x33 +#define SAVAGE_DESTCTRL_S3D 0x34 +#define SAVAGE_SCSTART_S3D 0x35 +#define SAVAGE_SCEND_S3D 0x36 +#define SAVAGE_ZWATERMARK_S3D 0x37 +#define SAVAGE_DESTTEXRWWATERMARK_S3D 0x38 +/* common stuff */ +#define SAVAGE_VERTBUFADDR 0x3e +#define SAVAGE_BITPLANEWTMASK 0xd7 +#define SAVAGE_DMABUFADDR 0x51 + +/* texture enable bits (needed for tex addr checking) */ +#define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */ +#define SAVAGE_TEXDESCR_TEX0EN_MASK 0x02000000 /* S4 */ +#define SAVAGE_TEXDESCR_TEX1EN_MASK 0x04000000 /* S4 */ + +/* Global fields in Savage4/Twister/ProSavage 3D registers: + * + * All texture registers and DrawLocalCtrl are local. All other + * registers are global. */ + +/* Global fields in Savage3D/MX/IX 3D registers: + * + * All texture registers are local. DrawCtrl and ZBufCtrl are + * partially local. All other registers are global. + * + * DrawCtrl global fields: cullMode, alphaTestCmpFunc, alphaTestEn, alphaRefVal + * ZBufCtrl global fields: zCmpFunc, zBufEn + */ +#define SAVAGE_DRAWCTRL_S3D_GLOBAL 0x03f3c00c +#define SAVAGE_ZBUFCTRL_S3D_GLOBAL 0x00000027 + +/* Masks for scissor bits (drawCtrl[01] on s4, scissorStart/End on s3d) + */ +#define SAVAGE_SCISSOR_MASK_S4 0x00fff7ff +#define SAVAGE_SCISSOR_MASK_S3D 0x07ff07ff + +/* + * BCI commands + */ +#define BCI_CMD_NOP 0x40000000 +#define BCI_CMD_RECT 0x48000000 +#define BCI_CMD_RECT_XP 0x01000000 +#define BCI_CMD_RECT_YP 0x02000000 +#define BCI_CMD_SCANLINE 0x50000000 +#define BCI_CMD_LINE 0x5C000000 +#define BCI_CMD_LINE_LAST_PIXEL 0x58000000 +#define BCI_CMD_BYTE_TEXT 0x63000000 +#define BCI_CMD_NT_BYTE_TEXT 0x67000000 +#define BCI_CMD_BIT_TEXT 0x6C000000 +#define BCI_CMD_GET_ROP(cmd) (((cmd) >> 16) & 0xFF) +#define BCI_CMD_SET_ROP(cmd, rop) ((cmd) |= ((rop & 0xFF) << 16)) +#define BCI_CMD_SEND_COLOR 0x00008000 + +#define BCI_CMD_CLIP_NONE 0x00000000 +#define BCI_CMD_CLIP_CURRENT 0x00002000 +#define BCI_CMD_CLIP_LR 0x00004000 +#define BCI_CMD_CLIP_NEW 0x00006000 + +#define BCI_CMD_DEST_GBD 0x00000000 +#define BCI_CMD_DEST_PBD 0x00000800 +#define BCI_CMD_DEST_PBD_NEW 0x00000C00 +#define BCI_CMD_DEST_SBD 0x00001000 +#define BCI_CMD_DEST_SBD_NEW 0x00001400 + +#define BCI_CMD_SRC_TRANSPARENT 0x00000200 +#define BCI_CMD_SRC_SOLID 0x00000000 +#define BCI_CMD_SRC_GBD 0x00000020 +#define BCI_CMD_SRC_COLOR 0x00000040 +#define BCI_CMD_SRC_MONO 0x00000060 +#define BCI_CMD_SRC_PBD_COLOR 0x00000080 +#define BCI_CMD_SRC_PBD_MONO 0x000000A0 +#define BCI_CMD_SRC_PBD_COLOR_NEW 0x000000C0 +#define BCI_CMD_SRC_PBD_MONO_NEW 0x000000E0 +#define BCI_CMD_SRC_SBD_COLOR 0x00000100 +#define BCI_CMD_SRC_SBD_MONO 0x00000120 +#define BCI_CMD_SRC_SBD_COLOR_NEW 0x00000140 +#define BCI_CMD_SRC_SBD_MONO_NEW 0x00000160 + +#define BCI_CMD_PAT_TRANSPARENT 0x00000010 +#define BCI_CMD_PAT_NONE 0x00000000 +#define BCI_CMD_PAT_COLOR 0x00000002 +#define BCI_CMD_PAT_MONO 0x00000003 +#define BCI_CMD_PAT_PBD_COLOR 0x00000004 +#define BCI_CMD_PAT_PBD_MONO 0x00000005 +#define BCI_CMD_PAT_PBD_COLOR_NEW 0x00000006 +#define BCI_CMD_PAT_PBD_MONO_NEW 0x00000007 +#define BCI_CMD_PAT_SBD_COLOR 0x00000008 +#define BCI_CMD_PAT_SBD_MONO 0x00000009 +#define BCI_CMD_PAT_SBD_COLOR_NEW 0x0000000A +#define BCI_CMD_PAT_SBD_MONO_NEW 0x0000000B + +#define BCI_BD_BW_DISABLE 0x10000000 +#define BCI_BD_TILE_MASK 0x03000000 +#define BCI_BD_TILE_NONE 0x00000000 +#define BCI_BD_TILE_16 0x02000000 +#define BCI_BD_TILE_32 0x03000000 +#define BCI_BD_GET_BPP(bd) (((bd) >> 16) & 0xFF) +#define BCI_BD_SET_BPP(bd, bpp) ((bd) |= (((bpp) & 0xFF) << 16)) +#define BCI_BD_GET_STRIDE(bd) ((bd) & 0xFFFF) +#define BCI_BD_SET_STRIDE(bd, st) ((bd) |= ((st) & 0xFFFF)) + +#define BCI_CMD_SET_REGISTER 0x96000000 + +#define BCI_CMD_WAIT 0xC0000000 +#define BCI_CMD_WAIT_3D 0x00010000 +#define BCI_CMD_WAIT_2D 0x00020000 + +#define BCI_CMD_UPDATE_EVENT_TAG 0x98000000 + +#define BCI_CMD_DRAW_PRIM 0x80000000 +#define BCI_CMD_DRAW_INDEXED_PRIM 0x88000000 +#define BCI_CMD_DRAW_CONT 0x01000000 +#define BCI_CMD_DRAW_TRILIST 0x00000000 +#define BCI_CMD_DRAW_TRISTRIP 0x02000000 +#define BCI_CMD_DRAW_TRIFAN 0x04000000 +#define BCI_CMD_DRAW_SKIPFLAGS 0x000000ff +#define BCI_CMD_DRAW_NO_Z 0x00000001 +#define BCI_CMD_DRAW_NO_W 0x00000002 +#define BCI_CMD_DRAW_NO_CD 0x00000004 +#define BCI_CMD_DRAW_NO_CS 0x00000008 +#define BCI_CMD_DRAW_NO_U0 0x00000010 +#define BCI_CMD_DRAW_NO_V0 0x00000020 +#define BCI_CMD_DRAW_NO_UV0 0x00000030 +#define BCI_CMD_DRAW_NO_U1 0x00000040 +#define BCI_CMD_DRAW_NO_V1 0x00000080 +#define BCI_CMD_DRAW_NO_UV1 0x000000c0 + +#define BCI_CMD_DMA 0xa8000000 + +#define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF) +#define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF) +#define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF) +#define BCI_CLIP_LR(l, r) ((((r) << 16) | (l)) & 0x0FFF0FFF) +#define BCI_CLIP_TL(t, l) ((((t) << 16) | (l)) & 0x0FFF0FFF) +#define BCI_CLIP_BR(b, r) ((((b) << 16) | (r)) & 0x0FFF0FFF) + +#define BCI_LINE_X_Y(x, y) (((y) << 16) | ((x) & 0xFFFF)) +#define BCI_LINE_STEPS(diag, axi) (((axi) << 16) | ((diag) & 0xFFFF)) +#define BCI_LINE_MISC(maj, ym, xp, yp, err) \ + (((maj) & 0x1FFF) | \ + ((ym) ? 1<<13 : 0) | \ + ((xp) ? 1<<14 : 0) | \ + ((yp) ? 1<<15 : 0) | \ + ((err) << 16)) + +/* + * common commands + */ +#define BCI_SET_REGISTERS( first, n ) \ + BCI_WRITE(BCI_CMD_SET_REGISTER | \ + ((uint32_t)(n) & 0xff) << 16 | \ + ((uint32_t)(first) & 0xffff)) +#define DMA_SET_REGISTERS( first, n ) \ + DMA_WRITE(BCI_CMD_SET_REGISTER | \ + ((uint32_t)(n) & 0xff) << 16 | \ + ((uint32_t)(first) & 0xffff)) + +#define BCI_DRAW_PRIMITIVE(n, type, skip) \ + BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ + ((n) << 16)) +#define DMA_DRAW_PRIMITIVE(n, type, skip) \ + DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ + ((n) << 16)) + +#define BCI_DRAW_INDICES_S3D(n, type, i0) \ + BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ + ((n) << 16) | (i0)) + +#define BCI_DRAW_INDICES_S4(n, type, skip) \ + BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ + (skip) | ((n) << 16)) + +#define BCI_DMA(n) \ + BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1)) + +/* + * access to MMIO + */ +#define SAVAGE_READ(reg) DRM_READ32( dev_priv->mmio, (reg) ) +#define SAVAGE_WRITE(reg) DRM_WRITE32( dev_priv->mmio, (reg) ) + +/* + * access to the burst command interface (BCI) + */ +#define SAVAGE_BCI_DEBUG 1 + +#define BCI_LOCALS volatile uint32_t *bci_ptr; + +#define BEGIN_BCI( n ) do { \ + dev_priv->wait_fifo(dev_priv, (n)); \ + bci_ptr = dev_priv->bci_ptr; \ +} while(0) + +#define BCI_WRITE( val ) *bci_ptr++ = (uint32_t)(val) + +#define BCI_COPY_FROM_USER(src,n) do { \ + unsigned int i; \ + for (i = 0; i < n; ++i) { \ + uint32_t val; \ + DRM_GET_USER_UNCHECKED(val, &((uint32_t*)(src))[i]); \ + BCI_WRITE(val); \ + } \ +} while(0) + +/* + * command DMA support + */ +#define SAVAGE_DMA_DEBUG 1 + +#define DMA_LOCALS uint32_t *dma_ptr; + +#define BEGIN_DMA( n ) do { \ + unsigned int cur = dev_priv->current_dma_page; \ + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \ + dev_priv->dma_pages[cur].used; \ + if ((n) > rest) { \ + dma_ptr = savage_dma_alloc(dev_priv, (n)); \ + } else { /* fast path for small allocations */ \ + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dev_priv->dma_pages[cur].used == 0) \ + savage_dma_wait(dev_priv, cur); \ + dev_priv->dma_pages[cur].used += (n); \ + } \ +} while(0) + +#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val) + +#define DMA_COPY_FROM_USER(src,n) do { \ + DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4); \ + dma_ptr += n; \ +} while(0) + +#if SAVAGE_DMA_DEBUG +#define DMA_COMMIT() do { \ + unsigned int cur = dev_priv->current_dma_page; \ + uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dma_ptr != expected) { \ + DRM_ERROR("DMA allocation and use don't match: " \ + "%p != %p\n", expected, dma_ptr); \ + savage_dma_reset(dev_priv); \ + } \ +} while(0) +#else +#define DMA_COMMIT() do {/* nothing */} while(0) +#endif + +#define DMA_FLUSH() dev_priv->dma_flush(dev_priv) + +/* Buffer aging via event tag + */ + +#define UPDATE_EVENT_COUNTER( ) do { \ + if (dev_priv->status_ptr) { \ + uint16_t count; \ + /* coordinate with Xserver */ \ + count = dev_priv->status_ptr[1023]; \ + if (count < dev_priv->event_counter) \ + dev_priv->event_wrap++; \ + dev_priv->event_counter = count; \ + } \ +} while(0) + +#define SET_AGE( age, e, w ) do { \ + (age)->event = e; \ + (age)->wrap = w; \ +} while(0) + +#define TEST_AGE( age, e, w ) \ + ( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) ) + +#endif /* __SAVAGE_DRV_H__ */ diff --git a/drivers/char/drm/savage_state.c b/drivers/char/drm/savage_state.c new file mode 100644 index 000000000000..475695a00083 --- /dev/null +++ b/drivers/char/drm/savage_state.c @@ -0,0 +1,1146 @@ +/* savage_state.c -- State and drawing support for Savage + * + * Copyright 2004 Felix Kuehling + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "drmP.h" +#include "savage_drm.h" +#include "savage_drv.h" + +void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, + drm_clip_rect_t *pbox) +{ + uint32_t scstart = dev_priv->state.s3d.new_scstart; + uint32_t scend = dev_priv->state.s3d.new_scend; + scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | + ((uint32_t)pbox->x1 & 0x000007ff) | + (((uint32_t)pbox->y1 << 16) & 0x07ff0000); + scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | + (((uint32_t)pbox->x2-1) & 0x000007ff) | + ((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000); + if (scstart != dev_priv->state.s3d.scstart || + scend != dev_priv->state.s3d.scend) { + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); + DMA_WRITE(scstart); + DMA_WRITE(scend); + dev_priv->state.s3d.scstart = scstart; + dev_priv->state.s3d.scend = scend; + dev_priv->waiting = 1; + DMA_COMMIT(); + } +} + +void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, + drm_clip_rect_t *pbox) +{ + uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; + uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; + drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | + ((uint32_t)pbox->x1 & 0x000007ff) | + (((uint32_t)pbox->y1 << 12) & 0x00fff000); + drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | + (((uint32_t)pbox->x2-1) & 0x000007ff) | + ((((uint32_t)pbox->y2-1) << 12) & 0x00fff000); + if (drawctrl0 != dev_priv->state.s4.drawctrl0 || + drawctrl1 != dev_priv->state.s4.drawctrl1) { + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); + DMA_WRITE(drawctrl0); + DMA_WRITE(drawctrl1); + dev_priv->state.s4.drawctrl0 = drawctrl0; + dev_priv->state.s4.drawctrl1 = drawctrl1; + dev_priv->waiting = 1; + DMA_COMMIT(); + } +} + +static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit, + uint32_t addr) +{ + if ((addr & 6) != 2) { /* reserved bits */ + DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); + return DRM_ERR(EINVAL); + } + if (!(addr & 1)) { /* local */ + addr &= ~7; + if (addr < dev_priv->texture_offset || + addr >= dev_priv->texture_offset+dev_priv->texture_size) { + DRM_ERROR("bad texAddr%d %08x (local addr out of range)\n", + unit, addr); + return DRM_ERR(EINVAL); + } + } else { /* AGP */ + if (!dev_priv->agp_textures) { + DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", + unit, addr); + return DRM_ERR(EINVAL); + } + addr &= ~7; + if (addr < dev_priv->agp_textures->offset || + addr >= (dev_priv->agp_textures->offset + + dev_priv->agp_textures->size)) { + DRM_ERROR("bad texAddr%d %08x (AGP addr out of range)\n", + unit, addr); + return DRM_ERR(EINVAL); + } + } + return 0; +} + +#define SAVE_STATE(reg,where) \ + if(start <= reg && start+count > reg) \ + DRM_GET_USER_UNCHECKED(dev_priv->state.where, ®s[reg-start]) +#define SAVE_STATE_MASK(reg,where,mask) do { \ + if(start <= reg && start+count > reg) { \ + uint32_t tmp; \ + DRM_GET_USER_UNCHECKED(tmp, ®s[reg-start]); \ + dev_priv->state.where = (tmp & (mask)) | \ + (dev_priv->state.where & ~(mask)); \ + } \ +} while (0) +static int savage_verify_state_s3d(drm_savage_private_t *dev_priv, + unsigned int start, unsigned int count, + const uint32_t __user *regs) +{ + if (start < SAVAGE_TEXPALADDR_S3D || + start+count-1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { + DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", + start, start+count-1); + return DRM_ERR(EINVAL); + } + + SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, + ~SAVAGE_SCISSOR_MASK_S3D); + SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, + ~SAVAGE_SCISSOR_MASK_S3D); + + /* if any texture regs were changed ... */ + if (start <= SAVAGE_TEXCTRL_S3D && + start+count > SAVAGE_TEXPALADDR_S3D) { + /* ... check texture state */ + SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); + SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); + if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) + return savage_verify_texaddr( + dev_priv, 0, dev_priv->state.s3d.texaddr); + } + + return 0; +} + +static int savage_verify_state_s4(drm_savage_private_t *dev_priv, + unsigned int start, unsigned int count, + const uint32_t __user *regs) +{ + int ret = 0; + + if (start < SAVAGE_DRAWLOCALCTRL_S4 || + start+count-1 > SAVAGE_TEXBLENDCOLOR_S4) { + DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", + start, start+count-1); + return DRM_ERR(EINVAL); + } + + SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, + ~SAVAGE_SCISSOR_MASK_S4); + SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, + ~SAVAGE_SCISSOR_MASK_S4); + + /* if any texture regs were changed ... */ + if (start <= SAVAGE_TEXDESCR_S4 && + start+count > SAVAGE_TEXPALADDR_S4) { + /* ... check texture state */ + SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); + SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); + SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); + if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) + ret |= savage_verify_texaddr( + dev_priv, 0, dev_priv->state.s4.texaddr0); + if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) + ret |= savage_verify_texaddr( + dev_priv, 1, dev_priv->state.s4.texaddr1); + } + + return ret; +} +#undef SAVE_STATE +#undef SAVE_STATE_MASK + +static int savage_dispatch_state(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const uint32_t __user *regs) +{ + unsigned int count = cmd_header->state.count; + unsigned int start = cmd_header->state.start; + unsigned int count2 = 0; + unsigned int bci_size; + int ret; + DMA_LOCALS; + + if (!count) + return 0; + + if (DRM_VERIFYAREA_READ(regs, count*4)) + return DRM_ERR(EFAULT); + + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + ret = savage_verify_state_s3d(dev_priv, start, count, regs); + if (ret != 0) + return ret; + /* scissor regs are emitted in savage_dispatch_draw */ + if (start < SAVAGE_SCSTART_S3D) { + if (start+count > SAVAGE_SCEND_S3D+1) + count2 = count - (SAVAGE_SCEND_S3D+1 - start); + if (start+count > SAVAGE_SCSTART_S3D) + count = SAVAGE_SCSTART_S3D - start; + } else if (start <= SAVAGE_SCEND_S3D) { + if (start+count > SAVAGE_SCEND_S3D+1) { + count -= SAVAGE_SCEND_S3D+1 - start; + start = SAVAGE_SCEND_S3D+1; + } else + return 0; + } + } else { + ret = savage_verify_state_s4(dev_priv, start, count, regs); + if (ret != 0) + return ret; + /* scissor regs are emitted in savage_dispatch_draw */ + if (start < SAVAGE_DRAWCTRL0_S4) { + if (start+count > SAVAGE_DRAWCTRL1_S4+1) + count2 = count - (SAVAGE_DRAWCTRL1_S4+1 - start); + if (start+count > SAVAGE_DRAWCTRL0_S4) + count = SAVAGE_DRAWCTRL0_S4 - start; + } else if (start <= SAVAGE_DRAWCTRL1_S4) { + if (start+count > SAVAGE_DRAWCTRL1_S4+1) { + count -= SAVAGE_DRAWCTRL1_S4+1 - start; + start = SAVAGE_DRAWCTRL1_S4+1; + } else + return 0; + } + } + + bci_size = count + (count+254)/255 + count2 + (count2+254)/255; + + if (cmd_header->state.global) { + BEGIN_DMA(bci_size+1); + DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); + dev_priv->waiting = 1; + } else { + BEGIN_DMA(bci_size); + } + + do { + while (count > 0) { + unsigned int n = count < 255 ? count : 255; + DMA_SET_REGISTERS(start, n); + DMA_COPY_FROM_USER(regs, n); + count -= n; + start += n; + regs += n; + } + start += 2; + regs += 2; + count = count2; + count2 = 0; + } while (count); + + DMA_COMMIT(); + + return 0; +} + +static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const drm_buf_t *dmabuf) +{ + unsigned char reorder = 0; + unsigned int prim = cmd_header->prim.prim; + unsigned int skip = cmd_header->prim.skip; + unsigned int n = cmd_header->prim.count; + unsigned int start = cmd_header->prim.start; + unsigned int i; + BCI_LOCALS; + + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + + if (!n) + return 0; + + switch (prim) { + case SAVAGE_PRIM_TRILIST_201: + reorder = 1; + prim = SAVAGE_PRIM_TRILIST; + case SAVAGE_PRIM_TRILIST: + if (n % 3 != 0) { + DRM_ERROR("wrong number of vertices %u in TRILIST\n", + n); + return DRM_ERR(EINVAL); + } + break; + case SAVAGE_PRIM_TRISTRIP: + case SAVAGE_PRIM_TRIFAN: + if (n < 3) { + DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n", + n); + return DRM_ERR(EINVAL); + } + break; + default: + DRM_ERROR("invalid primitive type %u\n", prim); + return DRM_ERR(EINVAL); + } + + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + if (skip != 0) { + DRM_ERROR("invalid skip flags 0x%04x for DMA\n", + skip); + return DRM_ERR(EINVAL); + } + } else { + unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - + (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - + (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); + if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { + DRM_ERROR("invalid skip flags 0x%04x for DMA\n", + skip); + return DRM_ERR(EINVAL); + } + if (reorder) { + DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); + return DRM_ERR(EINVAL); + } + } + + if (start + n > dmabuf->total/32) { + DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", + start, start + n - 1, dmabuf->total/32); + return DRM_ERR(EINVAL); + } + + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { + BEGIN_BCI(2); + BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); + BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); + dev_priv->state.common.vbaddr = dmabuf->bus_address; + } + if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { + /* Workaround for what looks like a hardware bug. If a + * WAIT_3D_IDLE was emitted some time before the + * indexed drawing command then the engine will lock + * up. There are two known workarounds: + * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ + BEGIN_BCI(63); + for (i = 0; i < 63; ++i) + BCI_WRITE(BCI_CMD_WAIT); + dev_priv->waiting = 0; + } + + prim <<= 25; + while (n != 0) { + /* Can emit up to 255 indices (85 triangles) at once. */ + unsigned int count = n > 255 ? 255 : n; + if (reorder) { + /* Need to reorder indices for correct flat + * shading while preserving the clock sense + * for correct culling. Only on Savage3D. */ + int reorder[3] = {-1, -1, -1}; + reorder[start%3] = 2; + + BEGIN_BCI((count+1+1)/2); + BCI_DRAW_INDICES_S3D(count, prim, start+2); + + for (i = start+1; i+1 < start+count; i += 2) + BCI_WRITE((i + reorder[i % 3]) | + ((i+1 + reorder[(i+1) % 3]) << 16)); + if (i < start+count) + BCI_WRITE(i + reorder[i%3]); + } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + BEGIN_BCI((count+1+1)/2); + BCI_DRAW_INDICES_S3D(count, prim, start); + + for (i = start+1; i+1 < start+count; i += 2) + BCI_WRITE(i | ((i+1) << 16)); + if (i < start+count) + BCI_WRITE(i); + } else { + BEGIN_BCI((count+2+1)/2); + BCI_DRAW_INDICES_S4(count, prim, skip); + + for (i = start; i+1 < start+count; i += 2) + BCI_WRITE(i | ((i+1) << 16)); + if (i < start+count) + BCI_WRITE(i); + } + + start += count; + n -= count; + + prim |= BCI_CMD_DRAW_CONT; + } + + return 0; +} + +static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const uint32_t __user *vtxbuf, + unsigned int vb_size, + unsigned int vb_stride) +{ + unsigned char reorder = 0; + unsigned int prim = cmd_header->prim.prim; + unsigned int skip = cmd_header->prim.skip; + unsigned int n = cmd_header->prim.count; + unsigned int start = cmd_header->prim.start; + unsigned int vtx_size; + unsigned int i; + DMA_LOCALS; + + if (!n) + return 0; + + switch (prim) { + case SAVAGE_PRIM_TRILIST_201: + reorder = 1; + prim = SAVAGE_PRIM_TRILIST; + case SAVAGE_PRIM_TRILIST: + if (n % 3 != 0) { + DRM_ERROR("wrong number of vertices %u in TRILIST\n", + n); + return DRM_ERR(EINVAL); + } + break; + case SAVAGE_PRIM_TRISTRIP: + case SAVAGE_PRIM_TRIFAN: + if (n < 3) { + DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n", + n); + return DRM_ERR(EINVAL); + } + break; + default: + DRM_ERROR("invalid primitive type %u\n", prim); + return DRM_ERR(EINVAL); + } + + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + if (skip > SAVAGE_SKIP_ALL_S3D) { + DRM_ERROR("invalid skip flags 0x%04x\n", skip); + return DRM_ERR(EINVAL); + } + vtx_size = 8; /* full vertex */ + } else { + if (skip > SAVAGE_SKIP_ALL_S4) { + DRM_ERROR("invalid skip flags 0x%04x\n", skip); + return DRM_ERR(EINVAL); + } + vtx_size = 10; /* full vertex */ + } + + vtx_size -= (skip & 1) + (skip >> 1 & 1) + + (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + + (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); + + if (vtx_size > vb_stride) { + DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", + vtx_size, vb_stride); + return DRM_ERR(EINVAL); + } + + if (start + n > vb_size / (vb_stride*4)) { + DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", + start, start + n - 1, vb_size / (vb_stride*4)); + return DRM_ERR(EINVAL); + } + + prim <<= 25; + while (n != 0) { + /* Can emit up to 255 vertices (85 triangles) at once. */ + unsigned int count = n > 255 ? 255 : n; + if (reorder) { + /* Need to reorder vertices for correct flat + * shading while preserving the clock sense + * for correct culling. Only on Savage3D. */ + int reorder[3] = {-1, -1, -1}; + reorder[start%3] = 2; + + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); + + for (i = start; i < start+count; ++i) { + unsigned int j = i + reorder[i % 3]; + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], + vtx_size); + } + + DMA_COMMIT(); + } else { + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); + + if (vb_stride == vtx_size) { + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start], + vtx_size*count); + } else { + for (i = start; i < start+count; ++i) { + DMA_COPY_FROM_USER( + &vtxbuf[vb_stride*i], + vtx_size); + } + } + + DMA_COMMIT(); + } + + start += count; + n -= count; + + prim |= BCI_CMD_DRAW_CONT; + } + + return 0; +} + +static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const uint16_t __user *usr_idx, + const drm_buf_t *dmabuf) +{ + unsigned char reorder = 0; + unsigned int prim = cmd_header->idx.prim; + unsigned int skip = cmd_header->idx.skip; + unsigned int n = cmd_header->idx.count; + unsigned int i; + BCI_LOCALS; + + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + + if (!n) + return 0; + + switch (prim) { + case SAVAGE_PRIM_TRILIST_201: + reorder = 1; + prim = SAVAGE_PRIM_TRILIST; + case SAVAGE_PRIM_TRILIST: + if (n % 3 != 0) { + DRM_ERROR("wrong number of indices %u in TRILIST\n", + n); + return DRM_ERR(EINVAL); + } + break; + case SAVAGE_PRIM_TRISTRIP: + case SAVAGE_PRIM_TRIFAN: + if (n < 3) { + DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n", + n); + return DRM_ERR(EINVAL); + } + break; + default: + DRM_ERROR("invalid primitive type %u\n", prim); + return DRM_ERR(EINVAL); + } + + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + if (skip != 0) { + DRM_ERROR("invalid skip flags 0x%04x for DMA\n", + skip); + return DRM_ERR(EINVAL); + } + } else { + unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - + (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - + (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); + if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { + DRM_ERROR("invalid skip flags 0x%04x for DMA\n", + skip); + return DRM_ERR(EINVAL); + } + if (reorder) { + DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); + return DRM_ERR(EINVAL); + } + } + + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { + BEGIN_BCI(2); + BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); + BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); + dev_priv->state.common.vbaddr = dmabuf->bus_address; + } + if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { + /* Workaround for what looks like a hardware bug. If a + * WAIT_3D_IDLE was emitted some time before the + * indexed drawing command then the engine will lock + * up. There are two known workarounds: + * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ + BEGIN_BCI(63); + for (i = 0; i < 63; ++i) + BCI_WRITE(BCI_CMD_WAIT); + dev_priv->waiting = 0; + } + + prim <<= 25; + while (n != 0) { + /* Can emit up to 255 indices (85 triangles) at once. */ + unsigned int count = n > 255 ? 255 : n; + /* Is it ok to allocate 510 bytes on the stack in an ioctl? */ + uint16_t idx[255]; + + /* Copy and check indices */ + DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2); + for (i = 0; i < count; ++i) { + if (idx[i] > dmabuf->total/32) { + DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", + i, idx[i], dmabuf->total/32); + return DRM_ERR(EINVAL); + } + } + + if (reorder) { + /* Need to reorder indices for correct flat + * shading while preserving the clock sense + * for correct culling. Only on Savage3D. */ + int reorder[3] = {2, -1, -1}; + + BEGIN_BCI((count+1+1)/2); + BCI_DRAW_INDICES_S3D(count, prim, idx[2]); + + for (i = 1; i+1 < count; i += 2) + BCI_WRITE(idx[i + reorder[i % 3]] | + (idx[i+1 + reorder[(i+1) % 3]] << 16)); + if (i < count) + BCI_WRITE(idx[i + reorder[i%3]]); + } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + BEGIN_BCI((count+1+1)/2); + BCI_DRAW_INDICES_S3D(count, prim, idx[0]); + + for (i = 1; i+1 < count; i += 2) + BCI_WRITE(idx[i] | (idx[i+1] << 16)); + if (i < count) + BCI_WRITE(idx[i]); + } else { + BEGIN_BCI((count+2+1)/2); + BCI_DRAW_INDICES_S4(count, prim, skip); + + for (i = 0; i+1 < count; i += 2) + BCI_WRITE(idx[i] | (idx[i+1] << 16)); + if (i < count) + BCI_WRITE(idx[i]); + } + + usr_idx += count; + n -= count; + + prim |= BCI_CMD_DRAW_CONT; + } + + return 0; +} + +static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const uint16_t __user *usr_idx, + const uint32_t __user *vtxbuf, + unsigned int vb_size, + unsigned int vb_stride) +{ + unsigned char reorder = 0; + unsigned int prim = cmd_header->idx.prim; + unsigned int skip = cmd_header->idx.skip; + unsigned int n = cmd_header->idx.count; + unsigned int vtx_size; + unsigned int i; + DMA_LOCALS; + + if (!n) + return 0; + + switch (prim) { + case SAVAGE_PRIM_TRILIST_201: + reorder = 1; + prim = SAVAGE_PRIM_TRILIST; + case SAVAGE_PRIM_TRILIST: + if (n % 3 != 0) { + DRM_ERROR("wrong number of indices %u in TRILIST\n", + n); + return DRM_ERR(EINVAL); + } + break; + case SAVAGE_PRIM_TRISTRIP: + case SAVAGE_PRIM_TRIFAN: + if (n < 3) { + DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n", + n); + return DRM_ERR(EINVAL); + } + break; + default: + DRM_ERROR("invalid primitive type %u\n", prim); + return DRM_ERR(EINVAL); + } + + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + if (skip > SAVAGE_SKIP_ALL_S3D) { + DRM_ERROR("invalid skip flags 0x%04x\n", skip); + return DRM_ERR(EINVAL); + } + vtx_size = 8; /* full vertex */ + } else { + if (skip > SAVAGE_SKIP_ALL_S4) { + DRM_ERROR("invalid skip flags 0x%04x\n", skip); + return DRM_ERR(EINVAL); + } + vtx_size = 10; /* full vertex */ + } + + vtx_size -= (skip & 1) + (skip >> 1 & 1) + + (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + + (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); + + if (vtx_size > vb_stride) { + DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", + vtx_size, vb_stride); + return DRM_ERR(EINVAL); + } + + prim <<= 25; + while (n != 0) { + /* Can emit up to 255 vertices (85 triangles) at once. */ + unsigned int count = n > 255 ? 255 : n; + /* Is it ok to allocate 510 bytes on the stack in an ioctl? */ + uint16_t idx[255]; + + /* Copy and check indices */ + DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2); + for (i = 0; i < count; ++i) { + if (idx[i] > vb_size / (vb_stride*4)) { + DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", + i, idx[i], vb_size / (vb_stride*4)); + return DRM_ERR(EINVAL); + } + } + + if (reorder) { + /* Need to reorder vertices for correct flat + * shading while preserving the clock sense + * for correct culling. Only on Savage3D. */ + int reorder[3] = {2, -1, -1}; + + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); + + for (i = 0; i < count; ++i) { + unsigned int j = idx[i + reorder[i % 3]]; + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], + vtx_size); + } + + DMA_COMMIT(); + } else { + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); + + for (i = 0; i < count; ++i) { + unsigned int j = idx[i]; + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], + vtx_size); + } + + DMA_COMMIT(); + } + + usr_idx += count; + n -= count; + + prim |= BCI_CMD_DRAW_CONT; + } + + return 0; +} + +static int savage_dispatch_clear(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t *cmd_header, + const drm_savage_cmd_header_t __user *data, + unsigned int nbox, + const drm_clip_rect_t __user *usr_boxes) +{ + unsigned int flags = cmd_header->clear0.flags, mask, value; + unsigned int clear_cmd; + unsigned int i, nbufs; + DMA_LOCALS; + + if (nbox == 0) + return 0; + + DRM_GET_USER_UNCHECKED(mask, &((const drm_savage_cmd_header_t*)data) + ->clear1.mask); + DRM_GET_USER_UNCHECKED(value, &((const drm_savage_cmd_header_t*)data) + ->clear1.value); + + clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | + BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; + BCI_CMD_SET_ROP(clear_cmd,0xCC); + + nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + + ((flags & SAVAGE_BACK) ? 1 : 0) + + ((flags & SAVAGE_DEPTH) ? 1 : 0); + if (nbufs == 0) + return 0; + + if (mask != 0xffffffff) { + /* set mask */ + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(mask); + DMA_COMMIT(); + } + for (i = 0; i < nbox; ++i) { + drm_clip_rect_t box; + unsigned int x, y, w, h; + unsigned int buf; + DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box)); + x = box.x1, y = box.y1; + w = box.x2 - box.x1; + h = box.y2 - box.y1; + BEGIN_DMA(nbufs*6); + for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { + if (!(flags & buf)) + continue; + DMA_WRITE(clear_cmd); + switch(buf) { + case SAVAGE_FRONT: + DMA_WRITE(dev_priv->front_offset); + DMA_WRITE(dev_priv->front_bd); + break; + case SAVAGE_BACK: + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); + break; + case SAVAGE_DEPTH: + DMA_WRITE(dev_priv->depth_offset); + DMA_WRITE(dev_priv->depth_bd); + break; + } + DMA_WRITE(value); + DMA_WRITE(BCI_X_Y(x, y)); + DMA_WRITE(BCI_W_H(w, h)); + } + DMA_COMMIT(); + } + if (mask != 0xffffffff) { + /* reset mask */ + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(0xffffffff); + DMA_COMMIT(); + } + + return 0; +} + +static int savage_dispatch_swap(drm_savage_private_t *dev_priv, + unsigned int nbox, + const drm_clip_rect_t __user *usr_boxes) +{ + unsigned int swap_cmd; + unsigned int i; + DMA_LOCALS; + + if (nbox == 0) + return 0; + + swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | + BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; + BCI_CMD_SET_ROP(swap_cmd,0xCC); + + for (i = 0; i < nbox; ++i) { + drm_clip_rect_t box; + DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box)); + + BEGIN_DMA(6); + DMA_WRITE(swap_cmd); + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1)); + DMA_COMMIT(); + } + + return 0; +} + +static int savage_dispatch_draw(drm_savage_private_t *dev_priv, + const drm_savage_cmd_header_t __user *start, + const drm_savage_cmd_header_t __user *end, + const drm_buf_t *dmabuf, + const unsigned int __user *usr_vtxbuf, + unsigned int vb_size, unsigned int vb_stride, + unsigned int nbox, + const drm_clip_rect_t __user *usr_boxes) +{ + unsigned int i, j; + int ret; + + for (i = 0; i < nbox; ++i) { + drm_clip_rect_t box; + const drm_savage_cmd_header_t __user *usr_cmdbuf; + DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box)); + dev_priv->emit_clip_rect(dev_priv, &box); + + usr_cmdbuf = start; + while (usr_cmdbuf < end) { + drm_savage_cmd_header_t cmd_header; + DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf, + sizeof(cmd_header)); + usr_cmdbuf++; + switch (cmd_header.cmd.cmd) { + case SAVAGE_CMD_DMA_PRIM: + ret = savage_dispatch_dma_prim( + dev_priv, &cmd_header, dmabuf); + break; + case SAVAGE_CMD_VB_PRIM: + ret = savage_dispatch_vb_prim( + dev_priv, &cmd_header, + (const uint32_t __user *)usr_vtxbuf, + vb_size, vb_stride); + break; + case SAVAGE_CMD_DMA_IDX: + j = (cmd_header.idx.count + 3) / 4; + /* j was check in savage_bci_cmdbuf */ + ret = savage_dispatch_dma_idx( + dev_priv, &cmd_header, + (const uint16_t __user *)usr_cmdbuf, + dmabuf); + usr_cmdbuf += j; + break; + case SAVAGE_CMD_VB_IDX: + j = (cmd_header.idx.count + 3) / 4; + /* j was check in savage_bci_cmdbuf */ + ret = savage_dispatch_vb_idx( + dev_priv, &cmd_header, + (const uint16_t __user *)usr_cmdbuf, + (const uint32_t __user *)usr_vtxbuf, + vb_size, vb_stride); + usr_cmdbuf += j; + break; + default: + /* What's the best return code? EFAULT? */ + DRM_ERROR("IMPLEMENTATION ERROR: " + "non-drawing-command %d\n", + cmd_header.cmd.cmd); + return DRM_ERR(EINVAL); + } + + if (ret != 0) + return ret; + } + } + + return 0; +} + +int savage_bci_cmdbuf(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_savage_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_buf_t *dmabuf; + drm_savage_cmdbuf_t cmdbuf; + drm_savage_cmd_header_t __user *usr_cmdbuf; + drm_savage_cmd_header_t __user *first_draw_cmd; + unsigned int __user *usr_vtxbuf; + drm_clip_rect_t __user *usr_boxes; + unsigned int i, j; + int ret = 0; + + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, filp); + + DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data, + sizeof(cmdbuf)); + + if (dma && dma->buflist) { + if (cmdbuf.dma_idx > dma->buf_count) { + DRM_ERROR("vertex buffer index %u out of range (0-%u)\n", + cmdbuf.dma_idx, dma->buf_count-1); + return DRM_ERR(EINVAL); + } + dmabuf = dma->buflist[cmdbuf.dma_idx]; + } else { + dmabuf = NULL; + } + + usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr; + usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr; + usr_boxes = (drm_clip_rect_t __user *)cmdbuf.box_addr; + if ((cmdbuf.size && DRM_VERIFYAREA_READ(usr_cmdbuf, cmdbuf.size*8)) || + (cmdbuf.vb_size && DRM_VERIFYAREA_READ( + usr_vtxbuf, cmdbuf.vb_size)) || + (cmdbuf.nbox && DRM_VERIFYAREA_READ( + usr_boxes, cmdbuf.nbox*sizeof(drm_clip_rect_t)))) + return DRM_ERR(EFAULT); + + /* Make sure writes to DMA buffers are finished before sending + * DMA commands to the graphics hardware. */ + DRM_MEMORYBARRIER(); + + /* Coming from user space. Don't know if the Xserver has + * emitted wait commands. Assuming the worst. */ + dev_priv->waiting = 1; + + i = 0; + first_draw_cmd = NULL; + while (i < cmdbuf.size) { + drm_savage_cmd_header_t cmd_header; + DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf, + sizeof(cmd_header)); + usr_cmdbuf++; + i++; + + /* Group drawing commands with same state to minimize + * iterations over clip rects. */ + j = 0; + switch (cmd_header.cmd.cmd) { + case SAVAGE_CMD_DMA_IDX: + case SAVAGE_CMD_VB_IDX: + j = (cmd_header.idx.count + 3) / 4; + if (i + j > cmdbuf.size) { + DRM_ERROR("indexed drawing command extends " + "beyond end of command buffer\n"); + DMA_FLUSH(); + return DRM_ERR(EINVAL); + } + /* fall through */ + case SAVAGE_CMD_DMA_PRIM: + case SAVAGE_CMD_VB_PRIM: + if (!first_draw_cmd) + first_draw_cmd = usr_cmdbuf-1; + usr_cmdbuf += j; + i += j; + break; + default: + if (first_draw_cmd) { + ret = savage_dispatch_draw ( + dev_priv, first_draw_cmd, usr_cmdbuf-1, + dmabuf, usr_vtxbuf, cmdbuf.vb_size, + cmdbuf.vb_stride, + cmdbuf.nbox, usr_boxes); + if (ret != 0) + return ret; + first_draw_cmd = NULL; + } + } + if (first_draw_cmd) + continue; + + switch (cmd_header.cmd.cmd) { + case SAVAGE_CMD_STATE: + j = (cmd_header.state.count + 1) / 2; + if (i + j > cmdbuf.size) { + DRM_ERROR("command SAVAGE_CMD_STATE extends " + "beyond end of command buffer\n"); + DMA_FLUSH(); + return DRM_ERR(EINVAL); + } + ret = savage_dispatch_state( + dev_priv, &cmd_header, + (uint32_t __user *)usr_cmdbuf); + usr_cmdbuf += j; + i += j; + break; + case SAVAGE_CMD_CLEAR: + if (i + 1 > cmdbuf.size) { + DRM_ERROR("command SAVAGE_CMD_CLEAR extends " + "beyond end of command buffer\n"); + DMA_FLUSH(); + return DRM_ERR(EINVAL); + } + ret = savage_dispatch_clear(dev_priv, &cmd_header, + usr_cmdbuf, + cmdbuf.nbox, usr_boxes); + usr_cmdbuf++; + i++; + break; + case SAVAGE_CMD_SWAP: + ret = savage_dispatch_swap(dev_priv, + cmdbuf.nbox, usr_boxes); + break; + default: + DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd); + DMA_FLUSH(); + return DRM_ERR(EINVAL); + } + + if (ret != 0) { + DMA_FLUSH(); + return ret; + } + } + + if (first_draw_cmd) { + ret = savage_dispatch_draw ( + dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf, + usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride, + cmdbuf.nbox, usr_boxes); + if (ret != 0) { + DMA_FLUSH(); + return ret; + } + } + + DMA_FLUSH(); + + if (dmabuf && cmdbuf.discard) { + drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; + uint16_t event; + event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); + SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); + savage_freelist_put(dev, dmabuf); + } + + return 0; +} From 3db368f71a91f08c5a93a5bfb6ca1e2de2668e04 Mon Sep 17 00:00:00 2001 From: Jason Gaston Date: Wed, 10 Aug 2005 06:18:43 -0700 Subject: [PATCH 064/584] [PATCH] ahci: AHCI mode SATA patch for Intel ICH7-M DH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hello, This patch adds the Intel ICH7-M DH DID to the ahci.c file for AHCI mode SATA support.  This patch was built against the 2.6.13-rc6 kernel.   If acceptable, please apply. Thanks, Jason Gaston Signed-off-by:  Jason Gaston Signed-off-by: Jeff Garzik --- drivers/scsi/ahci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index c5623694d10f..0c79cafb1348 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -269,6 +269,8 @@ static struct pci_device_id ahci_pci_tbl[] = { board_ahci }, /* ESB2 */ { PCI_VENDOR_ID_INTEL, 0x2683, PCI_ANY_ID, PCI_ANY_ID, 0, 0, board_ahci }, /* ESB2 */ + { PCI_VENDOR_ID_INTEL, 0x27c6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, + board_ahci }, /* ICH7-M DH */ { } /* terminate list */ }; From 20346722ec474245446bcbf460594a935a5c0512 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:24:33 -0700 Subject: [PATCH 065/584] [PATCH] S2io: Code cleanup Hi, We are submitting a series of 13 patches to support our Xframe I and Xframe II line of products. The patches can be categorized as follows: Patches 1-8 : Changes applicable to both Xframe I and II Patches 9-11: Xframe II specific features Patch 12: Addresses issues found during testing cycle. Patch 13: Incorpoates mostly the review comments from community and some last moment bug fixes. Please review the patches and let us know your comments. Starting with patch 1 below. This patch involves cosmetic changes(tabs and indentation, regrouping of transmit and receive data structures, typecasting, code cleanup). Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io-regs.h | 19 +- drivers/net/s2io.c | 1832 ++++++++++++++++++--------------------- drivers/net/s2io.h | 292 ++++--- 3 files changed, 1022 insertions(+), 1121 deletions(-) diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h index 7092ca6b277e..8746740e6efd 100644 --- a/drivers/net/s2io-regs.h +++ b/drivers/net/s2io-regs.h @@ -77,19 +77,18 @@ typedef struct _XENA_dev_config { #define ADAPTER_ECC_EN BIT(55) u64 serr_source; -#define SERR_SOURCE_PIC BIT(0) -#define SERR_SOURCE_TXDMA BIT(1) -#define SERR_SOURCE_RXDMA BIT(2) +#define SERR_SOURCE_PIC BIT(0) +#define SERR_SOURCE_TXDMA BIT(1) +#define SERR_SOURCE_RXDMA BIT(2) #define SERR_SOURCE_MAC BIT(3) #define SERR_SOURCE_MC BIT(4) #define SERR_SOURCE_XGXS BIT(5) -#define SERR_SOURCE_ANY (SERR_SOURCE_PIC | \ - SERR_SOURCE_TXDMA | \ - SERR_SOURCE_RXDMA | \ - SERR_SOURCE_MAC | \ - SERR_SOURCE_MC | \ - SERR_SOURCE_XGXS) - +#define SERR_SOURCE_ANY (SERR_SOURCE_PIC | \ + SERR_SOURCE_TXDMA | \ + SERR_SOURCE_RXDMA | \ + SERR_SOURCE_MAC | \ + SERR_SOURCE_MC | \ + SERR_SOURCE_XGXS) u8 unused_0[0x800 - 0x120]; diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index ea638b162d3f..0721e78dd8b0 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -11,29 +11,28 @@ * See the file COPYING in this distribution for more information. * * Credits: - * Jeff Garzik : For pointing out the improper error condition - * check in the s2io_xmit routine and also some - * issues in the Tx watch dog function. Also for - * patiently answering all those innumerable + * Jeff Garzik : For pointing out the improper error condition + * check in the s2io_xmit routine and also some + * issues in the Tx watch dog function. Also for + * patiently answering all those innumerable * questions regaring the 2.6 porting issues. * Stephen Hemminger : Providing proper 2.6 porting mechanism for some * macros available only in 2.6 Kernel. - * Francois Romieu : For pointing out all code part that were + * Francois Romieu : For pointing out all code part that were * deprecated and also styling related comments. - * Grant Grundler : For helping me get rid of some Architecture + * Grant Grundler : For helping me get rid of some Architecture * dependent code. * Christopher Hellwig : Some more 2.6 specific issues in the driver. - * + * * The module loadable parameters that are supported by the driver and a brief * explaination of all the variables. - * rx_ring_num : This can be used to program the number of receive rings used - * in the driver. - * rx_ring_len: This defines the number of descriptors each ring can have. This + * rx_ring_num : This can be used to program the number of receive rings used + * in the driver. + * rx_ring_len: This defines the number of descriptors each ring can have. This * is also an array of size 8. * tx_fifo_num: This defines the number of Tx FIFOs thats used int the driver. - * tx_fifo_len: This too is an array of 8. Each element defines the number of + * tx_fifo_len: This too is an array of 8. Each element defines the number of * Tx descriptors that can be associated with each corresponding FIFO. - * in PCI Configuration space. ************************************************************************/ #include @@ -57,19 +56,19 @@ #include #include -#include #include #include +#include /* local include */ #include "s2io.h" #include "s2io-regs.h" /* S2io Driver name & version. */ -static char s2io_driver_name[] = "s2io"; -static char s2io_driver_version[] = "Version 1.7.7.1"; +static char s2io_driver_name[] = "Neterion"; +static char s2io_driver_version[] = "Version 1.7.7"; -/* +/* * Cards with following subsystem_id have a link state indication * problem, 600B, 600C, 600D, 640B, 640C and 640D. * macro below identifies these cards given the subsystem_id. @@ -86,9 +85,13 @@ static char s2io_driver_version[] = "Version 1.7.7.1"; static inline int rx_buffer_level(nic_t * sp, int rxb_size, int ring) { int level = 0; - if ((sp->pkt_cnt[ring] - rxb_size) > 16) { + mac_info_t *mac_control; + + mac_control = &sp->mac_control; + if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16) { level = LOW; - if ((sp->pkt_cnt[ring] - rxb_size) < MAX_RXDS_PER_BLOCK) { + if ((mac_control->rings[ring].pkt_cnt - rxb_size) < + MAX_RXDS_PER_BLOCK) { level = PANIC; } } @@ -153,8 +156,7 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { #define S2IO_TEST_LEN sizeof(s2io_gstrings) / ETH_GSTRING_LEN #define S2IO_STRINGS_LEN S2IO_TEST_LEN * ETH_GSTRING_LEN - -/* +/* * Constants to be programmed into the Xena's registers, to configure * the XAUI. */ @@ -196,8 +198,7 @@ static u64 default_dtx_cfg[] = { END_SIGN }; - -/* +/* * Constants for Fixing the MacAddress problem seen mostly on * Alpha machines. */ @@ -227,6 +228,8 @@ static unsigned int rx_ring_num = 1; static unsigned int rx_ring_sz[MAX_RX_RINGS] = {[0 ...(MAX_RX_RINGS - 1)] = 0 }; static unsigned int Stats_refresh_time = 4; +static unsigned int rts_frm_len[MAX_RX_RINGS] = + {[0 ...(MAX_RX_RINGS - 1)] = 0 }; static unsigned int rmac_pause_time = 65535; static unsigned int mc_pause_threshold_q0q3 = 187; static unsigned int mc_pause_threshold_q4q7 = 187; @@ -237,9 +240,9 @@ static unsigned int rmac_util_period = 5; static unsigned int indicate_max_pkts; #endif -/* +/* * S2IO device table. - * This table lists all the devices that this driver supports. + * This table lists all the devices that this driver supports. */ static struct pci_device_id s2io_tbl[] __devinitdata = { {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_S2IO_WIN, @@ -247,9 +250,9 @@ static struct pci_device_id s2io_tbl[] __devinitdata = { {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_S2IO_UNI, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_WIN, - PCI_ANY_ID, PCI_ANY_ID}, - {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_UNI, - PCI_ANY_ID, PCI_ANY_ID}, + PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_UNI, + PCI_ANY_ID, PCI_ANY_ID}, {0,} }; @@ -268,8 +271,8 @@ static struct pci_driver s2io_driver = { /** * init_shared_mem - Allocation and Initialization of Memory * @nic: Device private variable. - * Description: The function allocates all the memory areas shared - * between the NIC and the driver. This includes Tx descriptors, + * Description: The function allocates all the memory areas shared + * between the NIC and the driver. This includes Tx descriptors, * Rx descriptors and the statistics block. */ @@ -279,11 +282,11 @@ static int init_shared_mem(struct s2io_nic *nic) void *tmp_v_addr, *tmp_v_addr_next; dma_addr_t tmp_p_addr, tmp_p_addr_next; RxD_block_t *pre_rxd_blk = NULL; - int i, j, blk_cnt; + int i, j, blk_cnt, rx_sz, tx_sz; int lst_size, lst_per_page; struct net_device *dev = nic->dev; #ifdef CONFIG_2BUFF_MODE - unsigned long tmp; + u64 tmp; buffAdd_t *ba; #endif @@ -308,28 +311,34 @@ static int init_shared_mem(struct s2io_nic *nic) } lst_size = (sizeof(TxD_t) * config->max_txds); + tx_sz = lst_size * size; lst_per_page = PAGE_SIZE / lst_size; for (i = 0; i < config->tx_fifo_num; i++) { int fifo_len = config->tx_cfg[i].fifo_len; int list_holder_size = fifo_len * sizeof(list_info_hold_t); - nic->list_info[i] = kmalloc(list_holder_size, GFP_KERNEL); - if (!nic->list_info[i]) { + mac_control->fifos[i].list_info = kmalloc(list_holder_size, + GFP_KERNEL); + if (!mac_control->fifos[i].list_info) { DBG_PRINT(ERR_DBG, "Malloc failed for list_info\n"); return -ENOMEM; } - memset(nic->list_info[i], 0, list_holder_size); + memset(mac_control->fifos[i].list_info, 0, list_holder_size); } for (i = 0; i < config->tx_fifo_num; i++) { int page_num = TXD_MEM_PAGE_CNT(config->tx_cfg[i].fifo_len, lst_per_page); - mac_control->tx_curr_put_info[i].offset = 0; - mac_control->tx_curr_put_info[i].fifo_len = + mac_control->fifos[i].tx_curr_put_info.offset = 0; + mac_control->fifos[i].tx_curr_put_info.fifo_len = config->tx_cfg[i].fifo_len - 1; - mac_control->tx_curr_get_info[i].offset = 0; - mac_control->tx_curr_get_info[i].fifo_len = + mac_control->fifos[i].tx_curr_get_info.offset = 0; + mac_control->fifos[i].tx_curr_get_info.fifo_len = config->tx_cfg[i].fifo_len - 1; + mac_control->fifos[i].fifo_no = i; + mac_control->fifos[i].nic = nic; + mac_control->fifos[i].max_txds = MAX_SKB_FRAGS; + for (j = 0; j < page_num; j++) { int k = 0; dma_addr_t tmp_p; @@ -345,16 +354,15 @@ static int init_shared_mem(struct s2io_nic *nic) while (k < lst_per_page) { int l = (j * lst_per_page) + k; if (l == config->tx_cfg[i].fifo_len) - goto end_txd_alloc; - nic->list_info[i][l].list_virt_addr = + break; + mac_control->fifos[i].list_info[l].list_virt_addr = tmp_v + (k * lst_size); - nic->list_info[i][l].list_phy_addr = + mac_control->fifos[i].list_info[l].list_phy_addr = tmp_p + (k * lst_size); k++; } } } - end_txd_alloc: /* Allocation and initialization of RXDs in Rings */ size = 0; @@ -367,21 +375,26 @@ static int init_shared_mem(struct s2io_nic *nic) return FAILURE; } size += config->rx_cfg[i].num_rxd; - nic->block_count[i] = + mac_control->rings[i].block_count = config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1); - nic->pkt_cnt[i] = - config->rx_cfg[i].num_rxd - nic->block_count[i]; + mac_control->rings[i].pkt_cnt = + config->rx_cfg[i].num_rxd - mac_control->rings[i].block_count; } + size = (size * (sizeof(RxD_t))); + rx_sz = size; for (i = 0; i < config->rx_ring_num; i++) { - mac_control->rx_curr_get_info[i].block_index = 0; - mac_control->rx_curr_get_info[i].offset = 0; - mac_control->rx_curr_get_info[i].ring_len = + mac_control->rings[i].rx_curr_get_info.block_index = 0; + mac_control->rings[i].rx_curr_get_info.offset = 0; + mac_control->rings[i].rx_curr_get_info.ring_len = config->rx_cfg[i].num_rxd - 1; - mac_control->rx_curr_put_info[i].block_index = 0; - mac_control->rx_curr_put_info[i].offset = 0; - mac_control->rx_curr_put_info[i].ring_len = + mac_control->rings[i].rx_curr_put_info.block_index = 0; + mac_control->rings[i].rx_curr_put_info.offset = 0; + mac_control->rings[i].rx_curr_put_info.ring_len = config->rx_cfg[i].num_rxd - 1; + mac_control->rings[i].nic = nic; + mac_control->rings[i].ring_no = i; + blk_cnt = config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1); /* Allocating all the Rx blocks */ @@ -395,32 +408,36 @@ static int init_shared_mem(struct s2io_nic *nic) &tmp_p_addr); if (tmp_v_addr == NULL) { /* - * In case of failure, free_shared_mem() - * is called, which should free any - * memory that was alloced till the + * In case of failure, free_shared_mem() + * is called, which should free any + * memory that was alloced till the * failure happened. */ - nic->rx_blocks[i][j].block_virt_addr = + mac_control->rings[i].rx_blocks[j].block_virt_addr = tmp_v_addr; return -ENOMEM; } memset(tmp_v_addr, 0, size); - nic->rx_blocks[i][j].block_virt_addr = tmp_v_addr; - nic->rx_blocks[i][j].block_dma_addr = tmp_p_addr; + mac_control->rings[i].rx_blocks[j].block_virt_addr = + tmp_v_addr; + mac_control->rings[i].rx_blocks[j].block_dma_addr = + tmp_p_addr; } /* Interlinking all Rx Blocks */ for (j = 0; j < blk_cnt; j++) { - tmp_v_addr = nic->rx_blocks[i][j].block_virt_addr; + tmp_v_addr = + mac_control->rings[i].rx_blocks[j].block_virt_addr; tmp_v_addr_next = - nic->rx_blocks[i][(j + 1) % + mac_control->rings[i].rx_blocks[(j + 1) % blk_cnt].block_virt_addr; - tmp_p_addr = nic->rx_blocks[i][j].block_dma_addr; + tmp_p_addr = + mac_control->rings[i].rx_blocks[j].block_dma_addr; tmp_p_addr_next = - nic->rx_blocks[i][(j + 1) % + mac_control->rings[i].rx_blocks[(j + 1) % blk_cnt].block_dma_addr; pre_rxd_blk = (RxD_block_t *) tmp_v_addr; - pre_rxd_blk->reserved_1 = END_OF_BLOCK; /* last RxD + pre_rxd_blk->reserved_1 = END_OF_BLOCK; /* last RxD * marker. */ #ifndef CONFIG_2BUFF_MODE @@ -433,43 +450,43 @@ static int init_shared_mem(struct s2io_nic *nic) } #ifdef CONFIG_2BUFF_MODE - /* + /* * Allocation of Storages for buffer addresses in 2BUFF mode * and the buffers as well. */ for (i = 0; i < config->rx_ring_num; i++) { blk_cnt = config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1); - nic->ba[i] = kmalloc((sizeof(buffAdd_t *) * blk_cnt), + mac_control->rings[i].ba = kmalloc((sizeof(buffAdd_t *) * blk_cnt), GFP_KERNEL); - if (!nic->ba[i]) + if (!mac_control->rings[i].ba) return -ENOMEM; for (j = 0; j < blk_cnt; j++) { int k = 0; - nic->ba[i][j] = kmalloc((sizeof(buffAdd_t) * + mac_control->rings[i].ba[j] = kmalloc((sizeof(buffAdd_t) * (MAX_RXDS_PER_BLOCK + 1)), GFP_KERNEL); - if (!nic->ba[i][j]) + if (!mac_control->rings[i].ba[j]) return -ENOMEM; while (k != MAX_RXDS_PER_BLOCK) { - ba = &nic->ba[i][j][k]; + ba = &mac_control->rings[i].ba[j][k]; - ba->ba_0_org = kmalloc + ba->ba_0_org = (void *) kmalloc (BUF0_LEN + ALIGN_SIZE, GFP_KERNEL); if (!ba->ba_0_org) return -ENOMEM; - tmp = (unsigned long) ba->ba_0_org; + tmp = (u64) ba->ba_0_org; tmp += ALIGN_SIZE; - tmp &= ~((unsigned long) ALIGN_SIZE); + tmp &= ~((u64) ALIGN_SIZE); ba->ba_0 = (void *) tmp; - ba->ba_1_org = kmalloc + ba->ba_1_org = (void *) kmalloc (BUF1_LEN + ALIGN_SIZE, GFP_KERNEL); if (!ba->ba_1_org) return -ENOMEM; - tmp = (unsigned long) ba->ba_1_org; + tmp = (u64) ba->ba_1_org; tmp += ALIGN_SIZE; - tmp &= ~((unsigned long) ALIGN_SIZE); + tmp &= ~((u64) ALIGN_SIZE); ba->ba_1 = (void *) tmp; k++; } @@ -483,9 +500,9 @@ static int init_shared_mem(struct s2io_nic *nic) (nic->pdev, size, &mac_control->stats_mem_phy); if (!mac_control->stats_mem) { - /* - * In case of failure, free_shared_mem() is called, which - * should free any memory that was alloced till the + /* + * In case of failure, free_shared_mem() is called, which + * should free any memory that was alloced till the * failure happened. */ return -ENOMEM; @@ -495,15 +512,14 @@ static int init_shared_mem(struct s2io_nic *nic) tmp_v_addr = mac_control->stats_mem; mac_control->stats_info = (StatInfo_t *) tmp_v_addr; memset(tmp_v_addr, 0, size); - DBG_PRINT(INIT_DBG, "%s:Ring Mem PHY: 0x%llx\n", dev->name, (unsigned long long) tmp_p_addr); return SUCCESS; } -/** - * free_shared_mem - Free the allocated Memory +/** + * free_shared_mem - Free the allocated Memory * @nic: Device private variable. * Description: This function is to free all memory locations allocated by * the init_shared_mem() function and return it to the kernel. @@ -533,15 +549,18 @@ static void free_shared_mem(struct s2io_nic *nic) lst_per_page); for (j = 0; j < page_num; j++) { int mem_blks = (j * lst_per_page); - if (!nic->list_info[i][mem_blks].list_virt_addr) + if (!mac_control->fifos[i].list_info[mem_blks]. + list_virt_addr) break; pci_free_consistent(nic->pdev, PAGE_SIZE, - nic->list_info[i][mem_blks]. + mac_control->fifos[i]. + list_info[mem_blks]. list_virt_addr, - nic->list_info[i][mem_blks]. + mac_control->fifos[i]. + list_info[mem_blks]. list_phy_addr); } - kfree(nic->list_info[i]); + kfree(mac_control->fifos[i].list_info); } #ifndef CONFIG_2BUFF_MODE @@ -550,10 +569,12 @@ static void free_shared_mem(struct s2io_nic *nic) size = SIZE_OF_BLOCK; #endif for (i = 0; i < config->rx_ring_num; i++) { - blk_cnt = nic->block_count[i]; + blk_cnt = mac_control->rings[i].block_count; for (j = 0; j < blk_cnt; j++) { - tmp_v_addr = nic->rx_blocks[i][j].block_virt_addr; - tmp_p_addr = nic->rx_blocks[i][j].block_dma_addr; + tmp_v_addr = mac_control->rings[i].rx_blocks[j]. + block_virt_addr; + tmp_p_addr = mac_control->rings[i].rx_blocks[j]. + block_dma_addr; if (tmp_v_addr == NULL) break; pci_free_consistent(nic->pdev, size, @@ -566,35 +587,21 @@ static void free_shared_mem(struct s2io_nic *nic) for (i = 0; i < config->rx_ring_num; i++) { blk_cnt = config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1); - if (!nic->ba[i]) - goto end_free; for (j = 0; j < blk_cnt; j++) { int k = 0; - if (!nic->ba[i][j]) { - kfree(nic->ba[i]); - goto end_free; - } + if (!mac_control->rings[i].ba[j]) + continue; while (k != MAX_RXDS_PER_BLOCK) { - buffAdd_t *ba = &nic->ba[i][j][k]; - if (!ba || !ba->ba_0_org || !ba->ba_1_org) - { - kfree(nic->ba[i]); - kfree(nic->ba[i][j]); - if(ba->ba_0_org) - kfree(ba->ba_0_org); - if(ba->ba_1_org) - kfree(ba->ba_1_org); - goto end_free; - } + buffAdd_t *ba = &mac_control->rings[i].ba[j][k]; kfree(ba->ba_0_org); kfree(ba->ba_1_org); k++; } - kfree(nic->ba[i][j]); + kfree(mac_control->rings[i].ba[j]); } - kfree(nic->ba[i]); + if (mac_control->rings[i].ba) + kfree(mac_control->rings[i].ba); } -end_free: #endif if (mac_control->stats_mem) { @@ -605,12 +612,12 @@ end_free: } } -/** - * init_nic - Initialization of hardware +/** + * init_nic - Initialization of hardware * @nic: device peivate variable - * Description: The function sequentially configures every block - * of the H/W from their reset values. - * Return Value: SUCCESS on success and + * Description: The function sequentially configures every block + * of the H/W from their reset values. + * Return Value: SUCCESS on success and * '-1' on failure (endian settings incorrect). */ @@ -626,12 +633,13 @@ static int init_nic(struct s2io_nic *nic) struct config_param *config; int mdio_cnt = 0, dtx_cnt = 0; unsigned long long mem_share; + int mem_size; mac_control = &nic->mac_control; config = &nic->config; - /* Initialize swapper control register */ - if (s2io_set_swapper(nic)) { + /* to set the swapper control on the card */ + if(s2io_set_swapper(nic)) { DBG_PRINT(ERR_DBG,"ERROR: Setting Swapper failed\n"); return -1; } @@ -639,8 +647,8 @@ static int init_nic(struct s2io_nic *nic) /* Remove XGXS from reset state */ val64 = 0; writeq(val64, &bar0->sw_reset); - val64 = readq(&bar0->sw_reset); msleep(500); + val64 = readq(&bar0->sw_reset); /* Enable Receiving broadcasts */ add = &bar0->mac_cfg; @@ -660,18 +668,18 @@ static int init_nic(struct s2io_nic *nic) val64 = dev->mtu; writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len); - /* - * Configuring the XAUI Interface of Xena. + /* + * Configuring the XAUI Interface of Xena. * *************************************** - * To Configure the Xena's XAUI, one has to write a series - * of 64 bit values into two registers in a particular - * sequence. Hence a macro 'SWITCH_SIGN' has been defined - * which will be defined in the array of configuration values - * (default_dtx_cfg & default_mdio_cfg) at appropriate places - * to switch writing from one regsiter to another. We continue + * To Configure the Xena's XAUI, one has to write a series + * of 64 bit values into two registers in a particular + * sequence. Hence a macro 'SWITCH_SIGN' has been defined + * which will be defined in the array of configuration values + * (default_dtx_cfg & default_mdio_cfg) at appropriate places + * to switch writing from one regsiter to another. We continue * writing these values until we encounter the 'END_SIGN' macro. - * For example, After making a series of 21 writes into - * dtx_control register the 'SWITCH_SIGN' appears and hence we + * For example, After making a series of 21 writes into + * dtx_control register the 'SWITCH_SIGN' appears and hence we * start writing into mdio_control until we encounter END_SIGN. */ while (1) { @@ -752,8 +760,8 @@ static int init_nic(struct s2io_nic *nic) DBG_PRINT(INIT_DBG, "Fifo partition at: 0x%p is: 0x%llx\n", &bar0->tx_fifo_partition_0, (unsigned long long) val64); - /* - * Initialization of Tx_PA_CONFIG register to ignore packet + /* + * Initialization of Tx_PA_CONFIG register to ignore packet * integrity checking. */ val64 = readq(&bar0->tx_pa_cfg); @@ -770,54 +778,54 @@ static int init_nic(struct s2io_nic *nic) } writeq(val64, &bar0->rx_queue_priority); - /* - * Allocating equal share of memory to all the + /* + * Allocating equal share of memory to all the * configured Rings. */ val64 = 0; + mem_size = 64; for (i = 0; i < config->rx_ring_num; i++) { switch (i) { case 0: - mem_share = (64 / config->rx_ring_num + - 64 % config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num + + mem_size % config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q0_SZ(mem_share); continue; case 1: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q1_SZ(mem_share); continue; case 2: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q2_SZ(mem_share); continue; case 3: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q3_SZ(mem_share); continue; case 4: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q4_SZ(mem_share); continue; case 5: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q5_SZ(mem_share); continue; case 6: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q6_SZ(mem_share); continue; case 7: - mem_share = (64 / config->rx_ring_num); + mem_share = (mem_size / config->rx_ring_num); val64 |= RX_QUEUE_CFG_Q7_SZ(mem_share); continue; } } writeq(val64, &bar0->rx_queue_cfg); - /* - * Initializing the Tx round robin registers to 0. - * Filling Tx and Rx round robin registers as per the - * number of FIFOs and Rings is still TODO. + /* Initializing the Tx round robin registers to 0 + * filling tx and rx round robin registers as per + * the number of FIFOs and Rings is still TODO */ writeq(0, &bar0->tx_w_round_robin_0); writeq(0, &bar0->tx_w_round_robin_1); @@ -825,30 +833,30 @@ static int init_nic(struct s2io_nic *nic) writeq(0, &bar0->tx_w_round_robin_3); writeq(0, &bar0->tx_w_round_robin_4); - /* + /* * TODO - * Disable Rx steering. Hard coding all packets be steered to - * Queue 0 for now. + * Disable Rx steering. Hard coding all packets to be steered to + * Queue 0 for now. */ val64 = 0x8080808080808080ULL; writeq(val64, &bar0->rts_qos_steering); /* UDP Fix */ val64 = 0; - for (i = 1; i < 8; i++) + for (i = 0; i < 8; i++) writeq(val64, &bar0->rts_frm_len_n[i]); - /* Set rts_frm_len register for fifo 0 */ - writeq(MAC_RTS_FRM_LEN_SET(dev->mtu + 22), - &bar0->rts_frm_len_n[0]); + /* Set the default rts frame length for ring0 */ + writeq(MAC_RTS_FRM_LEN_SET(dev->mtu+22), + &bar0->rts_frm_len_n[0]); - /* Enable statistics */ + /* Program statistics memory */ writeq(mac_control->stats_mem_phy, &bar0->stat_addr); val64 = SET_UPDT_PERIOD(Stats_refresh_time) | STAT_CFG_STAT_RO | STAT_CFG_STAT_EN; writeq(val64, &bar0->stat_cfg); - /* + /* * Initializing the sampling rate for the device to calculate the * bandwidth utilization. */ @@ -857,11 +865,12 @@ static int init_nic(struct s2io_nic *nic) writeq(val64, &bar0->mac_link_util); - /* - * Initializing the Transmit and Receive Traffic Interrupt + /* + * Initializing the Transmit and Receive Traffic Interrupt * Scheme. */ - /* TTI Initialization. Default Tx timer gets us about + /* + * TTI Initialization. Default Tx timer gets us about * 250 interrupts per sec. Continuous interrupts are enabled * by default. */ @@ -880,7 +889,7 @@ static int init_nic(struct s2io_nic *nic) val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD; writeq(val64, &bar0->tti_command_mem); - /* + /* * Once the operation completes, the Strobe bit of the command * register will be reset. We poll for this particular condition * We wait for a maximum of 500ms for the operation to complete, @@ -917,7 +926,7 @@ static int init_nic(struct s2io_nic *nic) val64 = RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE_NEW_CMD; writeq(val64, &bar0->rti_command_mem); - /* + /* * Once the operation completes, the Strobe bit of the command * register will be reset. We poll for this particular condition * We wait for a maximum of 500ms for the operation to complete, @@ -926,7 +935,7 @@ static int init_nic(struct s2io_nic *nic) time = 0; while (TRUE) { val64 = readq(&bar0->rti_command_mem); - if (!(val64 & TTI_CMD_MEM_STROBE_NEW_CMD)) { + if (!(val64 & RTI_CMD_MEM_STROBE_NEW_CMD)) { break; } if (time > 10) { @@ -938,15 +947,15 @@ static int init_nic(struct s2io_nic *nic) msleep(50); } - /* - * Initializing proper values as Pause threshold into all + /* + * Initializing proper values as Pause threshold into all * the 8 Queues on Rx side. */ writeq(0xffbbffbbffbbffbbULL, &bar0->mc_pause_thresh_q0q3); writeq(0xffbbffbbffbbffbbULL, &bar0->mc_pause_thresh_q4q7); /* Disable RMAC PAD STRIPPING */ - add = &bar0->mac_cfg; + add = (void *) &bar0->mac_cfg; val64 = readq(&bar0->mac_cfg); val64 &= ~(MAC_CFG_RMAC_STRIP_PAD); writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key); @@ -955,8 +964,8 @@ static int init_nic(struct s2io_nic *nic) writel((u32) (val64 >> 32), (add + 4)); val64 = readq(&bar0->mac_cfg); - /* - * Set the time value to be inserted in the pause frame + /* + * Set the time value to be inserted in the pause frame * generated by xena. */ val64 = readq(&bar0->rmac_pause_cfg); @@ -964,7 +973,7 @@ static int init_nic(struct s2io_nic *nic) val64 |= RMAC_PAUSE_HG_PTIME(nic->mac_control.rmac_pause_time); writeq(val64, &bar0->rmac_pause_cfg); - /* + /* * Set the Threshold Limit for Generating the pause frame * If the amount of data in any Queue exceeds ratio of * (mac_control.mc_pause_threshold_q0q3 or q4q7)/256 @@ -988,8 +997,8 @@ static int init_nic(struct s2io_nic *nic) } writeq(val64, &bar0->mc_pause_thresh_q4q7); - /* - * TxDMA will stop Read request if the number of read split has + /* + * TxDMA will stop Read request if the number of read split has * exceeded the limit pointed by shared_splits */ val64 = readq(&bar0->pic_control); @@ -999,14 +1008,14 @@ static int init_nic(struct s2io_nic *nic) return SUCCESS; } -/** - * en_dis_able_nic_intrs - Enable or Disable the interrupts +/** + * en_dis_able_nic_intrs - Enable or Disable the interrupts * @nic: device private variable, * @mask: A mask indicating which Intr block must be modified and, * @flag: A flag indicating whether to enable or disable the Intrs. * Description: This function will either disable or enable the interrupts - * depending on the flag argument. The mask argument can be used to - * enable/disable any Intr block. + * depending on the flag argument. The mask argument can be used to + * enable/disable any Intr block. * Return Value: NONE. */ @@ -1024,20 +1033,20 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* + /* * Disabled all PCIX, Flash, MDIO, IIC and GPIO - * interrupts for now. - * TODO + * interrupts for now. + * TODO */ writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask); - /* + /* * No MSI Support is available presently, so TTI and * RTI interrupts are also disabled. */ } else if (flag == DISABLE_INTRS) { - /* - * Disable PIC Intrs in the general - * intr mask register + /* + * Disable PIC Intrs in the general + * intr mask register */ writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask); temp64 = readq(&bar0->general_int_mask); @@ -1055,27 +1064,27 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* - * Keep all interrupts other than PFC interrupt + /* + * Keep all interrupts other than PFC interrupt * and PCC interrupt disabled in DMA level. */ val64 = DISABLE_ALL_INTRS & ~(TXDMA_PFC_INT_M | TXDMA_PCC_INT_M); writeq(val64, &bar0->txdma_int_mask); - /* - * Enable only the MISC error 1 interrupt in PFC block + /* + * Enable only the MISC error 1 interrupt in PFC block */ val64 = DISABLE_ALL_INTRS & (~PFC_MISC_ERR_1); writeq(val64, &bar0->pfc_err_mask); - /* - * Enable only the FB_ECC error interrupt in PCC block + /* + * Enable only the FB_ECC error interrupt in PCC block */ val64 = DISABLE_ALL_INTRS & (~PCC_FB_ECC_ERR); writeq(val64, &bar0->pcc_err_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable TxDMA Intrs in the general intr mask - * register + /* + * Disable TxDMA Intrs in the general intr mask + * register */ writeq(DISABLE_ALL_INTRS, &bar0->txdma_int_mask); writeq(DISABLE_ALL_INTRS, &bar0->pfc_err_mask); @@ -1093,15 +1102,15 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* - * All RxDMA block interrupts are disabled for now - * TODO + /* + * All RxDMA block interrupts are disabled for now + * TODO */ writeq(DISABLE_ALL_INTRS, &bar0->rxdma_int_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable RxDMA Intrs in the general intr mask - * register + /* + * Disable RxDMA Intrs in the general intr mask + * register */ writeq(DISABLE_ALL_INTRS, &bar0->rxdma_int_mask); temp64 = readq(&bar0->general_int_mask); @@ -1118,8 +1127,8 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* - * All MAC block error interrupts are disabled for now + /* + * All MAC block error interrupts are disabled for now * except the link status change interrupt. * TODO */ @@ -1132,8 +1141,8 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) val64 &= ~((u64) RMAC_LINK_STATE_CHANGE_INT); writeq(val64, &bar0->mac_rmac_err_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable MAC Intrs in the general intr mask register + /* + * Disable MAC Intrs in the general intr mask register */ writeq(DISABLE_ALL_INTRS, &bar0->mac_int_mask); writeq(DISABLE_ALL_INTRS, @@ -1152,14 +1161,14 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* + /* * All XGXS block error interrupts are disabled for now - * TODO + * TODO */ writeq(DISABLE_ALL_INTRS, &bar0->xgxs_int_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable MC Intrs in the general intr mask register + /* + * Disable MC Intrs in the general intr mask register */ writeq(DISABLE_ALL_INTRS, &bar0->xgxs_int_mask); temp64 = readq(&bar0->general_int_mask); @@ -1175,9 +1184,9 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* - * All MC block error interrupts are disabled for now - * TODO + /* + * All MC block error interrupts are disabled for now. + * TODO */ writeq(DISABLE_ALL_INTRS, &bar0->mc_int_mask); } else if (flag == DISABLE_INTRS) { @@ -1199,14 +1208,14 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 = readq(&bar0->general_int_mask); temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); - /* + /* * Enable all the Tx side interrupts - * writing 0 Enables all 64 TX interrupt levels + * writing 0 Enables all 64 TX interrupt levels */ writeq(0x0, &bar0->tx_traffic_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable Tx Traffic Intrs in the general intr mask + /* + * Disable Tx Traffic Intrs in the general intr mask * register. */ writeq(DISABLE_ALL_INTRS, &bar0->tx_traffic_mask); @@ -1226,8 +1235,8 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) /* writing 0 Enables all 8 RX interrupt levels */ writeq(0x0, &bar0->rx_traffic_mask); } else if (flag == DISABLE_INTRS) { - /* - * Disable Rx Traffic Intrs in the general intr mask + /* + * Disable Rx Traffic Intrs in the general intr mask * register. */ writeq(DISABLE_ALL_INTRS, &bar0->rx_traffic_mask); @@ -1238,20 +1247,42 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) } } -/** - * verify_xena_quiescence - Checks whether the H/W is ready +static int check_prc_pcc_state(u64 val64, int flag) +{ + int ret = 0; + + if (flag == FALSE) { + if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) && + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT)) { + ret = 1; + } + } else { + if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) == + ADAPTER_STATUS_RMAC_PCC_IDLE) && + (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT))) { + ret = 1; + } + } + + return ret; +} +/** + * verify_xena_quiescence - Checks whether the H/W is ready * @val64 : Value read from adapter status register. * @flag : indicates if the adapter enable bit was ever written once * before. * Description: Returns whether the H/W is ready to go or not. Depending - * on whether adapter enable bit was written or not the comparison + * on whether adapter enable bit was written or not the comparison * differs and the calling function passes the input argument flag to * indicate this. - * Return: 1 If xena is quiescence + * Return: 1 If xena is quiescence * 0 If Xena is not quiescence */ -static int verify_xena_quiescence(u64 val64, int flag) +static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag) { int ret = 0; u64 tmp64 = ~((u64) val64); @@ -1263,25 +1294,7 @@ static int verify_xena_quiescence(u64 val64, int flag) ADAPTER_STATUS_PIC_QUIESCENT | ADAPTER_STATUS_MC_DRAM_READY | ADAPTER_STATUS_MC_QUEUES_READY | ADAPTER_STATUS_M_PLL_LOCK | ADAPTER_STATUS_P_PLL_LOCK))) { - if (flag == FALSE) { - if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) && - ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == - ADAPTER_STATUS_RC_PRC_QUIESCENT)) { - - ret = 1; - - } - } else { - if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) == - ADAPTER_STATUS_RMAC_PCC_IDLE) && - (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || - ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == - ADAPTER_STATUS_RC_PRC_QUIESCENT))) { - - ret = 1; - - } - } + ret = check_prc_pcc_state(val64, flag); } return ret; @@ -1290,12 +1303,12 @@ static int verify_xena_quiescence(u64 val64, int flag) /** * fix_mac_address - Fix for Mac addr problem on Alpha platforms * @sp: Pointer to device specifc structure - * Description : + * Description : * New procedure to clear mac address reading problems on Alpha platforms * */ -static void fix_mac_address(nic_t * sp) +void fix_mac_address(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64; @@ -1303,20 +1316,21 @@ static void fix_mac_address(nic_t * sp) while (fix_mac[i] != END_SIGN) { writeq(fix_mac[i++], &bar0->gpio_control); + udelay(10); val64 = readq(&bar0->gpio_control); } } /** - * start_nic - Turns the device on + * start_nic - Turns the device on * @nic : device private variable. - * Description: - * This function actually turns the device on. Before this function is - * called,all Registers are configured from their reset states - * and shared memory is allocated but the NIC is still quiescent. On + * Description: + * This function actually turns the device on. Before this function is + * called,all Registers are configured from their reset states + * and shared memory is allocated but the NIC is still quiescent. On * calling this function, the device interrupts are cleared and the NIC is * literally switched on by writing into the adapter control register. - * Return Value: + * Return Value: * SUCCESS on success and -1 on failure. */ @@ -1325,8 +1339,8 @@ static int start_nic(struct s2io_nic *nic) XENA_dev_config_t __iomem *bar0 = nic->bar0; struct net_device *dev = nic->dev; register u64 val64 = 0; - u16 interruptible, i; - u16 subid; + u16 interruptible; + u16 subid, i; mac_info_t *mac_control; struct config_param *config; @@ -1335,7 +1349,7 @@ static int start_nic(struct s2io_nic *nic) /* PRC Initialization and configuration */ for (i = 0; i < config->rx_ring_num; i++) { - writeq((u64) nic->rx_blocks[i][0].block_dma_addr, + writeq((u64) mac_control->rings[i].rx_blocks[0].block_dma_addr, &bar0->prc_rxd0_n[i]); val64 = readq(&bar0->prc_ctrl_n[i]); @@ -1354,7 +1368,7 @@ static int start_nic(struct s2io_nic *nic) writeq(val64, &bar0->rx_pa_cfg); #endif - /* + /* * Enabling MC-RLDRAM. After enabling the device, we timeout * for around 100ms, which is approximately the time required * for the device to be ready for operation. @@ -1364,27 +1378,27 @@ static int start_nic(struct s2io_nic *nic) SPECIAL_REG_WRITE(val64, &bar0->mc_rldram_mrs, UF); val64 = readq(&bar0->mc_rldram_mrs); - msleep(100); /* Delay by around 100 ms. */ + msleep(100); /* Delay by around 100 ms. */ /* Enabling ECC Protection. */ val64 = readq(&bar0->adapter_control); val64 &= ~ADAPTER_ECC_EN; writeq(val64, &bar0->adapter_control); - /* - * Clearing any possible Link state change interrupts that + /* + * Clearing any possible Link state change interrupts that * could have popped up just before Enabling the card. */ val64 = readq(&bar0->mac_rmac_err_reg); if (val64) writeq(val64, &bar0->mac_rmac_err_reg); - /* - * Verify if the device is ready to be enabled, if so enable + /* + * Verify if the device is ready to be enabled, if so enable * it. */ val64 = readq(&bar0->adapter_status); - if (!verify_xena_quiescence(val64, nic->device_enabled_once)) { + if (!verify_xena_quiescence(nic, val64, nic->device_enabled_once)) { DBG_PRINT(ERR_DBG, "%s: device is not ready, ", dev->name); DBG_PRINT(ERR_DBG, "Adapter status reads: 0x%llx\n", (unsigned long long) val64); @@ -1396,12 +1410,12 @@ static int start_nic(struct s2io_nic *nic) RX_MAC_INTR; en_dis_able_nic_intrs(nic, interruptible, ENABLE_INTRS); - /* + /* * With some switches, link might be already up at this point. - * Because of this weird behavior, when we enable laser, - * we may not get link. We need to handle this. We cannot - * figure out which switch is misbehaving. So we are forced to - * make a global change. + * Because of this weird behavior, when we enable laser, + * we may not get link. We need to handle this. We cannot + * figure out which switch is misbehaving. So we are forced to + * make a global change. */ /* Enabling Laser. */ @@ -1416,17 +1430,17 @@ static int start_nic(struct s2io_nic *nic) val64 |= 0x0000800000000000ULL; writeq(val64, &bar0->gpio_control); val64 = 0x0411040400000000ULL; - writeq(val64, (void __iomem *) bar0 + 0x2700); + writeq(val64, (void __iomem *) ((u8 *) bar0 + 0x2700)); } - /* - * Don't see link state interrupts on certain switches, so + /* + * Don't see link state interrupts on certain switches, so * directly scheduling a link state task from here. */ schedule_work(&nic->set_link_task); - /* - * Here we are performing soft reset on XGXS to + /* + * Here we are performing soft reset on XGXS to * force link down. Since link is already up, we will get * link state change interrupt after this reset */ @@ -1443,12 +1457,12 @@ static int start_nic(struct s2io_nic *nic) return SUCCESS; } -/** - * free_tx_buffers - Free all queued Tx buffers +/** + * free_tx_buffers - Free all queued Tx buffers * @nic : device private variable. - * Description: + * Description: * Free all queued Tx buffers. - * Return Value: void + * Return Value: void */ static void free_tx_buffers(struct s2io_nic *nic) @@ -1466,7 +1480,7 @@ static void free_tx_buffers(struct s2io_nic *nic) for (i = 0; i < config->tx_fifo_num; i++) { for (j = 0; j < config->tx_cfg[i].fifo_len - 1; j++) { - txdp = (TxD_t *) nic->list_info[i][j]. + txdp = (TxD_t *) mac_control->fifos[i].list_info[j]. list_virt_addr; skb = (struct sk_buff *) ((unsigned long) txdp-> @@ -1482,16 +1496,16 @@ static void free_tx_buffers(struct s2io_nic *nic) DBG_PRINT(INTR_DBG, "%s:forcibly freeing %d skbs on FIFO%d\n", dev->name, cnt, i); - mac_control->tx_curr_get_info[i].offset = 0; - mac_control->tx_curr_put_info[i].offset = 0; + mac_control->fifos[i].tx_curr_get_info.offset = 0; + mac_control->fifos[i].tx_curr_put_info.offset = 0; } } -/** - * stop_nic - To stop the nic +/** + * stop_nic - To stop the nic * @nic ; device private variable. - * Description: - * This function does exactly the opposite of what the start_nic() + * Description: + * This function does exactly the opposite of what the start_nic() * function does. This function is called to stop the device. * Return Value: * void. @@ -1521,11 +1535,11 @@ static void stop_nic(struct s2io_nic *nic) } } -/** - * fill_rx_buffers - Allocates the Rx side skbs +/** + * fill_rx_buffers - Allocates the Rx side skbs * @nic: device private variable - * @ring_no: ring number - * Description: + * @ring_no: ring number + * Description: * The function allocates Rx side skbs and puts the physical * address of these buffers into the RxD buffer pointers, so that the NIC * can DMA the received frame into these locations. @@ -1533,8 +1547,8 @@ static void stop_nic(struct s2io_nic *nic) * 1. single buffer, * 2. three buffer and * 3. Five buffer modes. - * Each mode defines how many fragments the received frame will be split - * up into by the NIC. The frame is split into L3 header, L4 Header, + * Each mode defines how many fragments the received frame will be split + * up into by the NIC. The frame is split into L3 header, L4 Header, * L4 payload in three buffer mode and in 5 buffer mode, L4 payload itself * is split into 3 fragments. As of now only single buffer mode is * supported. @@ -1542,7 +1556,7 @@ static void stop_nic(struct s2io_nic *nic) * SUCCESS on success or an appropriate -ve value on failure. */ -static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) +int fill_rx_buffers(struct s2io_nic *nic, int ring_no) { struct net_device *dev = nic->dev; struct sk_buff *skb; @@ -1550,14 +1564,13 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) int off, off1, size, block_no, block_no1; int offset, offset1; u32 alloc_tab = 0; - u32 alloc_cnt = nic->pkt_cnt[ring_no] - - atomic_read(&nic->rx_bufs_left[ring_no]); + u32 alloc_cnt; mac_info_t *mac_control; struct config_param *config; #ifdef CONFIG_2BUFF_MODE RxD_t *rxdpnext; int nextblk; - unsigned long tmp; + u64 tmp; buffAdd_t *ba; dma_addr_t rxdpphys; #endif @@ -1567,17 +1580,18 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) mac_control = &nic->mac_control; config = &nic->config; - + alloc_cnt = mac_control->rings[ring_no].pkt_cnt - + atomic_read(&nic->rx_bufs_left[ring_no]); size = dev->mtu + HEADER_ETHERNET_II_802_3_SIZE + HEADER_802_2_SIZE + HEADER_SNAP_SIZE; while (alloc_tab < alloc_cnt) { - block_no = mac_control->rx_curr_put_info[ring_no]. + block_no = mac_control->rings[ring_no].rx_curr_put_info. block_index; - block_no1 = mac_control->rx_curr_get_info[ring_no]. + block_no1 = mac_control->rings[ring_no].rx_curr_get_info. block_index; - off = mac_control->rx_curr_put_info[ring_no].offset; - off1 = mac_control->rx_curr_get_info[ring_no].offset; + off = mac_control->rings[ring_no].rx_curr_put_info.offset; + off1 = mac_control->rings[ring_no].rx_curr_get_info.offset; #ifndef CONFIG_2BUFF_MODE offset = block_no * (MAX_RXDS_PER_BLOCK + 1) + off; offset1 = block_no1 * (MAX_RXDS_PER_BLOCK + 1) + off1; @@ -1586,7 +1600,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) offset1 = block_no1 * (MAX_RXDS_PER_BLOCK) + off1; #endif - rxdp = nic->rx_blocks[ring_no][block_no]. + rxdp = mac_control->rings[ring_no].rx_blocks[block_no]. block_virt_addr + off; if ((offset == offset1) && (rxdp->Host_Control)) { DBG_PRINT(INTR_DBG, "%s: Get and Put", dev->name); @@ -1595,15 +1609,15 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) } #ifndef CONFIG_2BUFF_MODE if (rxdp->Control_1 == END_OF_BLOCK) { - mac_control->rx_curr_put_info[ring_no]. + mac_control->rings[ring_no].rx_curr_put_info. block_index++; - mac_control->rx_curr_put_info[ring_no]. - block_index %= nic->block_count[ring_no]; - block_no = mac_control->rx_curr_put_info - [ring_no].block_index; + mac_control->rings[ring_no].rx_curr_put_info. + block_index %= mac_control->rings[ring_no].block_count; + block_no = mac_control->rings[ring_no].rx_curr_put_info. + block_index; off++; off %= (MAX_RXDS_PER_BLOCK + 1); - mac_control->rx_curr_put_info[ring_no].offset = + mac_control->rings[ring_no].rx_curr_put_info.offset = off; rxdp = (RxD_t *) ((unsigned long) rxdp->Control_2); DBG_PRINT(INTR_DBG, "%s: Next block at: %p\n", @@ -1611,30 +1625,30 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) } #ifndef CONFIG_S2IO_NAPI spin_lock_irqsave(&nic->put_lock, flags); - nic->put_pos[ring_no] = + mac_control->rings[ring_no].put_pos = (block_no * (MAX_RXDS_PER_BLOCK + 1)) + off; spin_unlock_irqrestore(&nic->put_lock, flags); #endif #else if (rxdp->Host_Control == END_OF_BLOCK) { - mac_control->rx_curr_put_info[ring_no]. + mac_control->rings[ring_no].rx_curr_put_info. block_index++; - mac_control->rx_curr_put_info[ring_no]. - block_index %= nic->block_count[ring_no]; - block_no = mac_control->rx_curr_put_info - [ring_no].block_index; + mac_control->rings[ring_no].rx_curr_put_info.block_index + %= mac_control->rings[ring_no].block_count; + block_no = mac_control->rings[ring_no].rx_curr_put_info + .block_index; off = 0; DBG_PRINT(INTR_DBG, "%s: block%d at: 0x%llx\n", dev->name, block_no, (unsigned long long) rxdp->Control_1); - mac_control->rx_curr_put_info[ring_no].offset = + mac_control->rings[ring_no].rx_curr_put_info.offset = off; - rxdp = nic->rx_blocks[ring_no][block_no]. + rxdp = mac_control->rings[ring_no].rx_blocks[block_no]. block_virt_addr; } #ifndef CONFIG_S2IO_NAPI spin_lock_irqsave(&nic->put_lock, flags); - nic->put_pos[ring_no] = (block_no * + mac_control->rings[ring_no].put_pos = (block_no * (MAX_RXDS_PER_BLOCK + 1)) + off; spin_unlock_irqrestore(&nic->put_lock, flags); #endif @@ -1646,27 +1660,27 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) if (rxdp->Control_2 & BIT(0)) #endif { - mac_control->rx_curr_put_info[ring_no]. + mac_control->rings[ring_no].rx_curr_put_info. offset = off; goto end; } #ifdef CONFIG_2BUFF_MODE - /* - * RxDs Spanning cache lines will be replenished only - * if the succeeding RxD is also owned by Host. It - * will always be the ((8*i)+3) and ((8*i)+6) - * descriptors for the 48 byte descriptor. The offending + /* + * RxDs Spanning cache lines will be replenished only + * if the succeeding RxD is also owned by Host. It + * will always be the ((8*i)+3) and ((8*i)+6) + * descriptors for the 48 byte descriptor. The offending * decsriptor is of-course the 3rd descriptor. */ - rxdpphys = nic->rx_blocks[ring_no][block_no]. + rxdpphys = mac_control->rings[ring_no].rx_blocks[block_no]. block_dma_addr + (off * sizeof(RxD_t)); if (((u64) (rxdpphys)) % 128 > 80) { - rxdpnext = nic->rx_blocks[ring_no][block_no]. + rxdpnext = mac_control->rings[ring_no].rx_blocks[block_no]. block_virt_addr + (off + 1); if (rxdpnext->Host_Control == END_OF_BLOCK) { nextblk = (block_no + 1) % - (nic->block_count[ring_no]); - rxdpnext = nic->rx_blocks[ring_no] + (mac_control->rings[ring_no].block_count); + rxdpnext = mac_control->rings[ring_no].rx_blocks [nextblk].block_virt_addr; } if (rxdpnext->Control_2 & BIT(0)) @@ -1695,9 +1709,9 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp->Control_1 |= RXD_OWN_XENA; off++; off %= (MAX_RXDS_PER_BLOCK + 1); - mac_control->rx_curr_put_info[ring_no].offset = off; + mac_control->rings[ring_no].rx_curr_put_info.offset = off; #else - ba = &nic->ba[ring_no][block_no][off]; + ba = &mac_control->rings[ring_no].ba[block_no][off]; skb_reserve(skb, BUF0_LEN); tmp = ((unsigned long) skb->data & ALIGN_SIZE); if (tmp) @@ -1721,8 +1735,9 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp->Host_Control = (u64) ((unsigned long) (skb)); rxdp->Control_1 |= RXD_OWN_XENA; off++; - mac_control->rx_curr_put_info[ring_no].offset = off; + mac_control->rings[ring_no].rx_curr_put_info.offset = off; #endif + atomic_inc(&nic->rx_bufs_left[ring_no]); alloc_tab++; } @@ -1732,9 +1747,9 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) } /** - * free_rx_buffers - Frees all Rx buffers + * free_rx_buffers - Frees all Rx buffers * @sp: device private variable. - * Description: + * Description: * This function will free all Rx buffers allocated by host. * Return Value: * NONE. @@ -1758,7 +1773,8 @@ static void free_rx_buffers(struct s2io_nic *sp) for (i = 0; i < config->rx_ring_num; i++) { for (j = 0, blk = 0; j < config->rx_cfg[i].num_rxd; j++) { off = j % (MAX_RXDS_PER_BLOCK + 1); - rxdp = sp->rx_blocks[i][blk].block_virt_addr + off; + rxdp = mac_control->rings[i].rx_blocks[blk]. + block_virt_addr + off; #ifndef CONFIG_2BUFF_MODE if (rxdp->Control_1 == END_OF_BLOCK) { @@ -1793,7 +1809,7 @@ static void free_rx_buffers(struct s2io_nic *sp) HEADER_SNAP_SIZE, PCI_DMA_FROMDEVICE); #else - ba = &sp->ba[i][blk][off]; + ba = &mac_control->rings[i].ba[blk][off]; pci_unmap_single(sp->pdev, (dma_addr_t) rxdp->Buffer0_ptr, BUF0_LEN, @@ -1813,10 +1829,10 @@ static void free_rx_buffers(struct s2io_nic *sp) } memset(rxdp, 0, sizeof(RxD_t)); } - mac_control->rx_curr_put_info[i].block_index = 0; - mac_control->rx_curr_get_info[i].block_index = 0; - mac_control->rx_curr_put_info[i].offset = 0; - mac_control->rx_curr_get_info[i].offset = 0; + mac_control->rings[i].rx_curr_put_info.block_index = 0; + mac_control->rings[i].rx_curr_get_info.block_index = 0; + mac_control->rings[i].rx_curr_put_info.offset = 0; + mac_control->rings[i].rx_curr_get_info.offset = 0; atomic_set(&sp->rx_bufs_left[i], 0); DBG_PRINT(INIT_DBG, "%s:Freed 0x%x Rx Buffers on ring%d\n", dev->name, buf_cnt, i); @@ -1826,7 +1842,7 @@ static void free_rx_buffers(struct s2io_nic *sp) /** * s2io_poll - Rx interrupt handler for NAPI support * @dev : pointer to the device structure. - * @budget : The number of packets that were budgeted to be processed + * @budget : The number of packets that were budgeted to be processed * during one pass through the 'Poll" function. * Description: * Comes into picture only if NAPI support has been incorporated. It does @@ -1836,160 +1852,35 @@ static void free_rx_buffers(struct s2io_nic *sp) * 0 on success and 1 if there are No Rx packets to be processed. */ -#ifdef CONFIG_S2IO_NAPI +#if defined(CONFIG_S2IO_NAPI) static int s2io_poll(struct net_device *dev, int *budget) { nic_t *nic = dev->priv; - XENA_dev_config_t __iomem *bar0 = nic->bar0; - int pkts_to_process = *budget, pkt_cnt = 0; - register u64 val64 = 0; - rx_curr_get_info_t get_info, put_info; - int i, get_block, put_block, get_offset, put_offset, ring_bufs; -#ifndef CONFIG_2BUFF_MODE - u16 val16, cksum; -#endif - struct sk_buff *skb; - RxD_t *rxdp; + int pkt_cnt = 0, org_pkts_to_process; mac_info_t *mac_control; struct config_param *config; -#ifdef CONFIG_2BUFF_MODE - buffAdd_t *ba; -#endif + XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0; + u64 val64; + int i; mac_control = &nic->mac_control; config = &nic->config; - if (pkts_to_process > dev->quota) - pkts_to_process = dev->quota; + nic->pkts_to_process = *budget; + if (nic->pkts_to_process > dev->quota) + nic->pkts_to_process = dev->quota; + org_pkts_to_process = nic->pkts_to_process; val64 = readq(&bar0->rx_traffic_int); writeq(val64, &bar0->rx_traffic_int); for (i = 0; i < config->rx_ring_num; i++) { - get_info = mac_control->rx_curr_get_info[i]; - get_block = get_info.block_index; - put_info = mac_control->rx_curr_put_info[i]; - put_block = put_info.block_index; - ring_bufs = config->rx_cfg[i].num_rxd; - rxdp = nic->rx_blocks[i][get_block].block_virt_addr + - get_info.offset; -#ifndef CONFIG_2BUFF_MODE - get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) + - put_info.offset; - while ((!(rxdp->Control_1 & RXD_OWN_XENA)) && - (((get_offset + 1) % ring_bufs) != put_offset)) { - if (--pkts_to_process < 0) { - goto no_rx; - } - if (rxdp->Control_1 == END_OF_BLOCK) { - rxdp = - (RxD_t *) ((unsigned long) rxdp-> - Control_2); - get_info.offset++; - get_info.offset %= - (MAX_RXDS_PER_BLOCK + 1); - get_block++; - get_block %= nic->block_count[i]; - mac_control->rx_curr_get_info[i]. - offset = get_info.offset; - mac_control->rx_curr_get_info[i]. - block_index = get_block; - continue; - } - get_offset = - (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - skb = - (struct sk_buff *) ((unsigned long) rxdp-> - Host_Control); - if (skb == NULL) { - DBG_PRINT(ERR_DBG, "%s: The skb is ", - dev->name); - DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); - goto no_rx; - } - val64 = RXD_GET_BUFFER0_SIZE(rxdp->Control_2); - val16 = (u16) (val64 >> 48); - cksum = RXD_GET_L4_CKSUM(rxdp->Control_1); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer0_ptr, - dev->mtu + - HEADER_ETHERNET_II_802_3_SIZE + - HEADER_802_2_SIZE + - HEADER_SNAP_SIZE, - PCI_DMA_FROMDEVICE); - rx_osm_handler(nic, val16, rxdp, i); - pkt_cnt++; - get_info.offset++; - get_info.offset %= (MAX_RXDS_PER_BLOCK + 1); - rxdp = - nic->rx_blocks[i][get_block].block_virt_addr + - get_info.offset; - mac_control->rx_curr_get_info[i].offset = - get_info.offset; + rx_intr_handler(&mac_control->rings[i]); + pkt_cnt = org_pkts_to_process - nic->pkts_to_process; + if (!nic->pkts_to_process) { + /* Quota for the current iteration has been met */ + goto no_rx; } -#else - get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) + - put_info.offset; - while (((!(rxdp->Control_1 & RXD_OWN_XENA)) && - !(rxdp->Control_2 & BIT(0))) && - (((get_offset + 1) % ring_bufs) != put_offset)) { - if (--pkts_to_process < 0) { - goto no_rx; - } - skb = (struct sk_buff *) ((unsigned long) - rxdp->Host_Control); - if (skb == NULL) { - DBG_PRINT(ERR_DBG, "%s: The skb is ", - dev->name); - DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); - goto no_rx; - } - - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer0_ptr, - BUF0_LEN, PCI_DMA_FROMDEVICE); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer1_ptr, - BUF1_LEN, PCI_DMA_FROMDEVICE); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer2_ptr, - dev->mtu + BUF0_LEN + 4, - PCI_DMA_FROMDEVICE); - ba = &nic->ba[i][get_block][get_info.offset]; - - rx_osm_handler(nic, rxdp, i, ba); - - get_info.offset++; - mac_control->rx_curr_get_info[i].offset = - get_info.offset; - rxdp = - nic->rx_blocks[i][get_block].block_virt_addr + - get_info.offset; - - if (get_info.offset && - (!(get_info.offset % MAX_RXDS_PER_BLOCK))) { - get_info.offset = 0; - mac_control->rx_curr_get_info[i]. - offset = get_info.offset; - get_block++; - get_block %= nic->block_count[i]; - mac_control->rx_curr_get_info[i]. - block_index = get_block; - rxdp = - nic->rx_blocks[i][get_block]. - block_virt_addr; - } - get_offset = - (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - pkt_cnt++; - } -#endif } if (!pkt_cnt) pkt_cnt = 1; @@ -2009,7 +1900,7 @@ static int s2io_poll(struct net_device *dev, int *budget) en_dis_able_nic_intrs(nic, RX_TRAFFIC_INTR, ENABLE_INTRS); return 0; - no_rx: +no_rx: dev->quota -= pkt_cnt; *budget -= pkt_cnt; @@ -2022,277 +1913,213 @@ static int s2io_poll(struct net_device *dev, int *budget) } return 1; } -#else -/** +#endif + +/** * rx_intr_handler - Rx interrupt handler * @nic: device private variable. - * Description: - * If the interrupt is because of a received frame or if the + * Description: + * If the interrupt is because of a received frame or if the * receive ring contains fresh as yet un-processed frames,this function is - * called. It picks out the RxD at which place the last Rx processing had - * stopped and sends the skb to the OSM's Rx handler and then increments + * called. It picks out the RxD at which place the last Rx processing had + * stopped and sends the skb to the OSM's Rx handler and then increments * the offset. * Return Value: * NONE. */ - -static void rx_intr_handler(struct s2io_nic *nic) +static void rx_intr_handler(ring_info_t *ring_data) { + nic_t *nic = ring_data->nic; struct net_device *dev = (struct net_device *) nic->dev; - XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0; + XENA_dev_config_t __iomem *bar0 = nic->bar0; + int get_block, get_offset, put_block, put_offset, ring_bufs; rx_curr_get_info_t get_info, put_info; RxD_t *rxdp; struct sk_buff *skb; -#ifndef CONFIG_2BUFF_MODE - u16 val16, cksum; -#endif - register u64 val64 = 0; - int get_block, get_offset, put_block, put_offset, ring_bufs; - int i, pkt_cnt = 0; - mac_info_t *mac_control; - struct config_param *config; -#ifdef CONFIG_2BUFF_MODE - buffAdd_t *ba; +#ifndef CONFIG_S2IO_NAPI + int pkt_cnt = 0; #endif + register u64 val64; - mac_control = &nic->mac_control; - config = &nic->config; - - /* - * rx_traffic_int reg is an R1 register, hence we read and write back - * the samevalue in the register to clear it. + /* + * rx_traffic_int reg is an R1 register, hence we read and write + * back the same value in the register to clear it */ - val64 = readq(&bar0->rx_traffic_int); - writeq(val64, &bar0->rx_traffic_int); + val64 = readq(&bar0->tx_traffic_int); + writeq(val64, &bar0->tx_traffic_int); - for (i = 0; i < config->rx_ring_num; i++) { - get_info = mac_control->rx_curr_get_info[i]; - get_block = get_info.block_index; - put_info = mac_control->rx_curr_put_info[i]; - put_block = put_info.block_index; - ring_bufs = config->rx_cfg[i].num_rxd; - rxdp = nic->rx_blocks[i][get_block].block_virt_addr + + get_info = ring_data->rx_curr_get_info; + get_block = get_info.block_index; + put_info = ring_data->rx_curr_put_info; + put_block = put_info.block_index; + ring_bufs = get_info.ring_len+1; + rxdp = ring_data->rx_blocks[get_block].block_virt_addr + get_info.offset; -#ifndef CONFIG_2BUFF_MODE - get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - spin_lock(&nic->put_lock); - put_offset = nic->put_pos[i]; - spin_unlock(&nic->put_lock); - while ((!(rxdp->Control_1 & RXD_OWN_XENA)) && - (((get_offset + 1) % ring_bufs) != put_offset)) { - if (rxdp->Control_1 == END_OF_BLOCK) { - rxdp = (RxD_t *) ((unsigned long) - rxdp->Control_2); - get_info.offset++; - get_info.offset %= - (MAX_RXDS_PER_BLOCK + 1); - get_block++; - get_block %= nic->block_count[i]; - mac_control->rx_curr_get_info[i]. - offset = get_info.offset; - mac_control->rx_curr_get_info[i]. - block_index = get_block; - continue; - } - get_offset = - (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - skb = (struct sk_buff *) ((unsigned long) - rxdp->Host_Control); - if (skb == NULL) { - DBG_PRINT(ERR_DBG, "%s: The skb is ", - dev->name); - DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); - return; - } - val64 = RXD_GET_BUFFER0_SIZE(rxdp->Control_2); - val16 = (u16) (val64 >> 48); - cksum = RXD_GET_L4_CKSUM(rxdp->Control_1); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer0_ptr, - dev->mtu + - HEADER_ETHERNET_II_802_3_SIZE + - HEADER_802_2_SIZE + - HEADER_SNAP_SIZE, - PCI_DMA_FROMDEVICE); - rx_osm_handler(nic, val16, rxdp, i); - get_info.offset++; - get_info.offset %= (MAX_RXDS_PER_BLOCK + 1); - rxdp = - nic->rx_blocks[i][get_block].block_virt_addr + - get_info.offset; - mac_control->rx_curr_get_info[i].offset = - get_info.offset; - pkt_cnt++; - if ((indicate_max_pkts) - && (pkt_cnt > indicate_max_pkts)) - break; - } + get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + + get_info.offset; +#ifndef CONFIG_S2IO_NAPI + spin_lock(&nic->put_lock); + put_offset = ring_data->put_pos; + spin_unlock(&nic->put_lock); #else - get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - spin_lock(&nic->put_lock); - put_offset = nic->put_pos[i]; - spin_unlock(&nic->put_lock); - while (((!(rxdp->Control_1 & RXD_OWN_XENA)) && - !(rxdp->Control_2 & BIT(0))) && - (((get_offset + 1) % ring_bufs) != put_offset)) { - skb = (struct sk_buff *) ((unsigned long) - rxdp->Host_Control); - if (skb == NULL) { - DBG_PRINT(ERR_DBG, "%s: The skb is ", - dev->name); - DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); - return; - } - - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer0_ptr, - BUF0_LEN, PCI_DMA_FROMDEVICE); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer1_ptr, - BUF1_LEN, PCI_DMA_FROMDEVICE); - pci_unmap_single(nic->pdev, (dma_addr_t) - rxdp->Buffer2_ptr, - dev->mtu + BUF0_LEN + 4, - PCI_DMA_FROMDEVICE); - ba = &nic->ba[i][get_block][get_info.offset]; - - rx_osm_handler(nic, rxdp, i, ba); - - get_info.offset++; - mac_control->rx_curr_get_info[i].offset = - get_info.offset; - rxdp = - nic->rx_blocks[i][get_block].block_virt_addr + - get_info.offset; - - if (get_info.offset && - (!(get_info.offset % MAX_RXDS_PER_BLOCK))) { - get_info.offset = 0; - mac_control->rx_curr_get_info[i]. - offset = get_info.offset; - get_block++; - get_block %= nic->block_count[i]; - mac_control->rx_curr_get_info[i]. - block_index = get_block; - rxdp = - nic->rx_blocks[i][get_block]. - block_virt_addr; - } - get_offset = - (get_block * (MAX_RXDS_PER_BLOCK + 1)) + - get_info.offset; - pkt_cnt++; - if ((indicate_max_pkts) - && (pkt_cnt > indicate_max_pkts)) - break; - } + put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) + + put_info.offset; #endif + while ((!(rxdp->Control_1 & RXD_OWN_XENA)) && +#ifdef CONFIG_2BUFF_MODE + (!rxdp->Control_2 & BIT(0)) && +#endif + (((get_offset + 1) % ring_bufs) != put_offset)) { + skb = (struct sk_buff *) ((unsigned long)rxdp->Host_Control); + if (skb == NULL) { + DBG_PRINT(ERR_DBG, "%s: The skb is ", + dev->name); + DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); + return; + } +#ifndef CONFIG_2BUFF_MODE + pci_unmap_single(nic->pdev, (dma_addr_t) + rxdp->Buffer0_ptr, + dev->mtu + + HEADER_ETHERNET_II_802_3_SIZE + + HEADER_802_2_SIZE + + HEADER_SNAP_SIZE, + PCI_DMA_FROMDEVICE); +#else + pci_unmap_single(nic->pdev, (dma_addr_t) + rxdp->Buffer0_ptr, + BUF0_LEN, PCI_DMA_FROMDEVICE); + pci_unmap_single(nic->pdev, (dma_addr_t) + rxdp->Buffer1_ptr, + BUF1_LEN, PCI_DMA_FROMDEVICE); + pci_unmap_single(nic->pdev, (dma_addr_t) + rxdp->Buffer2_ptr, + dev->mtu + BUF0_LEN + 4, + PCI_DMA_FROMDEVICE); +#endif + rx_osm_handler(ring_data, rxdp); + get_info.offset++; + ring_data->rx_curr_get_info.offset = + get_info.offset; + rxdp = ring_data->rx_blocks[get_block].block_virt_addr + + get_info.offset; + if (get_info.offset && + (!(get_info.offset % MAX_RXDS_PER_BLOCK))) { + get_info.offset = 0; + ring_data->rx_curr_get_info.offset + = get_info.offset; + get_block++; + get_block %= ring_data->block_count; + ring_data->rx_curr_get_info.block_index + = get_block; + rxdp = ring_data->rx_blocks[get_block].block_virt_addr; + } + + get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) + + get_info.offset; +#ifdef CONFIG_S2IO_NAPI + nic->pkts_to_process -= 1; + if (!nic->pkts_to_process) + break; +#else + pkt_cnt++; if ((indicate_max_pkts) && (pkt_cnt > indicate_max_pkts)) break; +#endif } } -#endif -/** + +/** * tx_intr_handler - Transmit interrupt handler * @nic : device private variable - * Description: - * If an interrupt was raised to indicate DMA complete of the - * Tx packet, this function is called. It identifies the last TxD - * whose buffer was freed and frees all skbs whose data have already + * Description: + * If an interrupt was raised to indicate DMA complete of the + * Tx packet, this function is called. It identifies the last TxD + * whose buffer was freed and frees all skbs whose data have already * DMA'ed into the NICs internal memory. * Return Value: * NONE */ -static void tx_intr_handler(struct s2io_nic *nic) +static void tx_intr_handler(fifo_info_t *fifo_data) { + nic_t *nic = fifo_data->nic; XENA_dev_config_t __iomem *bar0 = nic->bar0; struct net_device *dev = (struct net_device *) nic->dev; tx_curr_get_info_t get_info, put_info; struct sk_buff *skb; TxD_t *txdlp; - register u64 val64 = 0; - int i; u16 j, frg_cnt; - mac_info_t *mac_control; - struct config_param *config; + register u64 val64 = 0; - mac_control = &nic->mac_control; - config = &nic->config; - - /* - * tx_traffic_int reg is an R1 register, hence we read and write - * back the samevalue in the register to clear it. + /* + * tx_traffic_int reg is an R1 register, hence we read and write + * back the same value in the register to clear it */ val64 = readq(&bar0->tx_traffic_int); writeq(val64, &bar0->tx_traffic_int); - for (i = 0; i < config->tx_fifo_num; i++) { - get_info = mac_control->tx_curr_get_info[i]; - put_info = mac_control->tx_curr_put_info[i]; - txdlp = (TxD_t *) nic->list_info[i][get_info.offset]. - list_virt_addr; - while ((!(txdlp->Control_1 & TXD_LIST_OWN_XENA)) && - (get_info.offset != put_info.offset) && - (txdlp->Host_Control)) { - /* Check for TxD errors */ - if (txdlp->Control_1 & TXD_T_CODE) { - unsigned long long err; - err = txdlp->Control_1 & TXD_T_CODE; - DBG_PRINT(ERR_DBG, "***TxD error %llx\n", - err); - } - - skb = (struct sk_buff *) ((unsigned long) - txdlp->Host_Control); - if (skb == NULL) { - DBG_PRINT(ERR_DBG, "%s: Null skb ", - dev->name); - DBG_PRINT(ERR_DBG, "in Tx Free Intr\n"); - return; - } - nic->tx_pkt_count++; - - frg_cnt = skb_shinfo(skb)->nr_frags; - - /* For unfragmented skb */ - pci_unmap_single(nic->pdev, (dma_addr_t) - txdlp->Buffer_Pointer, - skb->len - skb->data_len, - PCI_DMA_TODEVICE); - if (frg_cnt) { - TxD_t *temp = txdlp; - txdlp++; - for (j = 0; j < frg_cnt; j++, txdlp++) { - skb_frag_t *frag = - &skb_shinfo(skb)->frags[j]; - pci_unmap_page(nic->pdev, - (dma_addr_t) - txdlp-> - Buffer_Pointer, - frag->size, - PCI_DMA_TODEVICE); - } - txdlp = temp; - } - memset(txdlp, 0, - (sizeof(TxD_t) * config->max_txds)); - - /* Updating the statistics block */ - nic->stats.tx_packets++; - nic->stats.tx_bytes += skb->len; - dev_kfree_skb_irq(skb); - - get_info.offset++; - get_info.offset %= get_info.fifo_len + 1; - txdlp = (TxD_t *) nic->list_info[i] - [get_info.offset].list_virt_addr; - mac_control->tx_curr_get_info[i].offset = - get_info.offset; + get_info = fifo_data->tx_curr_get_info; + put_info = fifo_data->tx_curr_put_info; + txdlp = (TxD_t *) fifo_data->list_info[get_info.offset]. + list_virt_addr; + while ((!(txdlp->Control_1 & TXD_LIST_OWN_XENA)) && + (get_info.offset != put_info.offset) && + (txdlp->Host_Control)) { + /* Check for TxD errors */ + if (txdlp->Control_1 & TXD_T_CODE) { + unsigned long long err; + err = txdlp->Control_1 & TXD_T_CODE; + DBG_PRINT(ERR_DBG, "***TxD error %llx\n", + err); } + + skb = (struct sk_buff *) ((unsigned long) + txdlp->Host_Control); + if (skb == NULL) { + DBG_PRINT(ERR_DBG, "%s: Null skb ", + __FUNCTION__); + DBG_PRINT(ERR_DBG, "in Tx Free Intr\n"); + return; + } + + frg_cnt = skb_shinfo(skb)->nr_frags; + nic->tx_pkt_count++; + + pci_unmap_single(nic->pdev, (dma_addr_t) + txdlp->Buffer_Pointer, + skb->len - skb->data_len, + PCI_DMA_TODEVICE); + if (frg_cnt) { + TxD_t *temp; + temp = txdlp; + txdlp++; + for (j = 0; j < frg_cnt; j++, txdlp++) { + skb_frag_t *frag = + &skb_shinfo(skb)->frags[j]; + pci_unmap_page(nic->pdev, + (dma_addr_t) + txdlp-> + Buffer_Pointer, + frag->size, + PCI_DMA_TODEVICE); + } + txdlp = temp; + } + memset(txdlp, 0, + (sizeof(TxD_t) * fifo_data->max_txds)); + + /* Updating the statistics block */ + nic->stats.tx_packets++; + nic->stats.tx_bytes += skb->len; + dev_kfree_skb_irq(skb); + + get_info.offset++; + get_info.offset %= get_info.fifo_len + 1; + txdlp = (TxD_t *) fifo_data->list_info + [get_info.offset].list_virt_addr; + fifo_data->tx_curr_get_info.offset = + get_info.offset; } spin_lock(&nic->tx_lock); @@ -2301,13 +2128,13 @@ static void tx_intr_handler(struct s2io_nic *nic) spin_unlock(&nic->tx_lock); } -/** +/** * alarm_intr_handler - Alarm Interrrupt handler * @nic: device private variable - * Description: If the interrupt was neither because of Rx packet or Tx + * Description: If the interrupt was neither because of Rx packet or Tx * complete, this function is called. If the interrupt was to indicate - * a loss of link, the OSM link status handler is invoked for any other - * alarm interrupt the block that raised the interrupt is displayed + * a loss of link, the OSM link status handler is invoked for any other + * alarm interrupt the block that raised the interrupt is displayed * and a H/W reset is issued. * Return Value: * NONE @@ -2338,7 +2165,7 @@ static void alarm_intr_handler(struct s2io_nic *nic) /* * Also as mentioned in the latest Errata sheets if the PCC_FB_ECC * Error occurs, the adapter will be recycled by disabling the - * adapter enable bit and enabling it again after the device + * adapter enable bit and enabling it again after the device * becomes Quiescent. */ val64 = readq(&bar0->pcc_err_reg); @@ -2354,18 +2181,18 @@ static void alarm_intr_handler(struct s2io_nic *nic) /* Other type of interrupts are not being handled now, TODO */ } -/** +/** * wait_for_cmd_complete - waits for a command to complete. - * @sp : private member of the device structure, which is a pointer to the + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. - * Description: Function that waits for a command to Write into RMAC - * ADDR DATA registers to be completed and returns either success or - * error depending on whether the command was complete or not. + * Description: Function that waits for a command to Write into RMAC + * ADDR DATA registers to be completed and returns either success or + * error depending on whether the command was complete or not. * Return value: * SUCCESS on success and FAILURE on failure. */ -static int wait_for_cmd_complete(nic_t * sp) +int wait_for_cmd_complete(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; int ret = FAILURE, cnt = 0; @@ -2385,17 +2212,17 @@ static int wait_for_cmd_complete(nic_t * sp) return ret; } -/** - * s2io_reset - Resets the card. +/** + * s2io_reset - Resets the card. * @sp : private member of the device structure. * Description: Function to Reset the card. This function then also - * restores the previously saved PCI configuration space registers as + * restores the previously saved PCI configuration space registers as * the card reset also resets the configuration space. * Return value: * void. */ -static void s2io_reset(nic_t * sp) +void s2io_reset(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64; @@ -2404,10 +2231,10 @@ static void s2io_reset(nic_t * sp) val64 = SW_RESET_ALL; writeq(val64, &bar0->sw_reset); - /* - * At this stage, if the PCI write is indeed completed, the - * card is reset and so is the PCI Config space of the device. - * So a read cannot be issued at this stage on any of the + /* + * At this stage, if the PCI write is indeed completed, the + * card is reset and so is the PCI Config space of the device. + * So a read cannot be issued at this stage on any of the * registers to ensure the write into "sw_reset" register * has gone through. * Question: Is there any system call that will explicitly force @@ -2420,10 +2247,17 @@ static void s2io_reset(nic_t * sp) /* Restore the PCI state saved during initializarion. */ pci_restore_state(sp->pdev); + s2io_init_pci(sp); msleep(250); + /* Set swapper to enable I/O register access */ + s2io_set_swapper(sp); + + /* Reset device statistics maintained by OS */ + memset(&sp->stats, 0, sizeof (struct net_device_stats)); + /* SXE-002: Configure link and activity LED to turn it off */ subid = sp->pdev->subsystem_device; if ((subid & 0xFF) >= 0x07) { @@ -2431,29 +2265,29 @@ static void s2io_reset(nic_t * sp) val64 |= 0x0000800000000000ULL; writeq(val64, &bar0->gpio_control); val64 = 0x0411040400000000ULL; - writeq(val64, (void __iomem *) bar0 + 0x2700); + writeq(val64, (void __iomem *) ((u8 *) bar0 + 0x2700)); } sp->device_enabled_once = FALSE; } /** - * s2io_set_swapper - to set the swapper controle on the card - * @sp : private member of the device structure, + * s2io_set_swapper - to set the swapper controle on the card + * @sp : private member of the device structure, * pointer to the s2io_nic structure. - * Description: Function to set the swapper control on the card + * Description: Function to set the swapper control on the card * correctly depending on the 'endianness' of the system. * Return value: * SUCCESS on success and FAILURE on failure. */ -static int s2io_set_swapper(nic_t * sp) +int s2io_set_swapper(nic_t * sp) { struct net_device *dev = sp->dev; XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64, valt, valr; - /* + /* * Set proper endian settings and verify the same by reading * the PIF Feed-back register. */ @@ -2505,8 +2339,9 @@ static int s2io_set_swapper(nic_t * sp) i++; } if(i == 4) { + unsigned long long x = val64; DBG_PRINT(ERR_DBG, "Write failed, Xmsi_addr "); - DBG_PRINT(ERR_DBG, "reads:0x%llx\n",val64); + DBG_PRINT(ERR_DBG, "reads:0x%llx\n", x); return FAILURE; } } @@ -2514,8 +2349,8 @@ static int s2io_set_swapper(nic_t * sp) val64 &= 0xFFFF000000000000ULL; #ifdef __BIG_ENDIAN - /* - * The device by default set to a big endian format, so a + /* + * The device by default set to a big endian format, so a * big endian driver need not set anything. */ val64 |= (SWAPPER_CTRL_TXP_FE | @@ -2531,9 +2366,9 @@ static int s2io_set_swapper(nic_t * sp) SWAPPER_CTRL_STATS_FE | SWAPPER_CTRL_STATS_SE); writeq(val64, &bar0->swapper_ctrl); #else - /* + /* * Initially we enable all bits to make it accessible by the - * driver, then we selectively enable only those bits that + * driver, then we selectively enable only those bits that * we want to set. */ val64 |= (SWAPPER_CTRL_TXP_FE | @@ -2555,8 +2390,8 @@ static int s2io_set_swapper(nic_t * sp) #endif val64 = readq(&bar0->swapper_ctrl); - /* - * Verifying if endian settings are accurate by reading a + /* + * Verifying if endian settings are accurate by reading a * feedback register. */ val64 = readq(&bar0->pif_rd_swapper_fb); @@ -2576,25 +2411,25 @@ static int s2io_set_swapper(nic_t * sp) * Functions defined below concern the OS part of the driver * * ********************************************************* */ -/** +/** * s2io_open - open entry point of the driver * @dev : pointer to the device structure. * Description: * This function is the open entry point of the driver. It mainly calls a * function to allocate Rx buffers and inserts them into the buffer - * descriptors and then enables the Rx part of the NIC. + * descriptors and then enables the Rx part of the NIC. * Return value: * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int s2io_open(struct net_device *dev) +int s2io_open(struct net_device *dev) { nic_t *sp = dev->priv; int err = 0; - /* - * Make sure you have link off by default every time + /* + * Make sure you have link off by default every time * Nic is initialized */ netif_carrier_off(dev); @@ -2604,27 +2439,34 @@ static int s2io_open(struct net_device *dev) if (s2io_card_up(sp)) { DBG_PRINT(ERR_DBG, "%s: H/W initialization failed\n", dev->name); - return -ENODEV; + err = -ENODEV; + goto hw_init_failed; } /* After proper initialization of H/W, register ISR */ - err = request_irq((int) sp->irq, s2io_isr, SA_SHIRQ, + err = request_irq((int) sp->pdev->irq, s2io_isr, SA_SHIRQ, sp->name, dev); if (err) { - s2io_reset(sp); DBG_PRINT(ERR_DBG, "%s: ISR registration failed\n", dev->name); - return err; + goto isr_registration_failed; } if (s2io_set_mac_addr(dev, dev->dev_addr) == FAILURE) { DBG_PRINT(ERR_DBG, "Set Mac Address Failed\n"); - s2io_reset(sp); - return -ENODEV; + err = -ENODEV; + goto setting_mac_address_failed; } netif_start_queue(dev); return 0; + +setting_mac_address_failed: + free_irq(sp->pdev->irq, dev); +isr_registration_failed: + s2io_reset(sp); +hw_init_failed: + return err; } /** @@ -2640,16 +2482,15 @@ static int s2io_open(struct net_device *dev) * file on failure. */ -static int s2io_close(struct net_device *dev) +int s2io_close(struct net_device *dev) { nic_t *sp = dev->priv; - flush_scheduled_work(); netif_stop_queue(dev); /* Reset card, kill tasklet and free Tx and Rx buffers. */ s2io_card_down(sp); - free_irq(dev->irq, dev); + free_irq(sp->pdev->irq, dev); sp->device_close_flag = TRUE; /* Device is shut down. */ return 0; } @@ -2667,7 +2508,7 @@ static int s2io_close(struct net_device *dev) * 0 on success & 1 on failure. */ -static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) +int s2io_xmit(struct sk_buff *skb, struct net_device *dev) { nic_t *sp = dev->priv; u16 frg_cnt, frg_len, i, queue, queue_len, put_off, get_off; @@ -2685,22 +2526,24 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) mac_control = &sp->mac_control; config = &sp->config; - DBG_PRINT(TX_DBG, "%s: In S2IO Tx routine\n", dev->name); + DBG_PRINT(TX_DBG, "%s: In Neterion Tx routine\n", dev->name); spin_lock_irqsave(&sp->tx_lock, flags); - if (atomic_read(&sp->card_state) == CARD_DOWN) { - DBG_PRINT(ERR_DBG, "%s: Card going down for reset\n", + DBG_PRINT(TX_DBG, "%s: Card going down for reset\n", dev->name); spin_unlock_irqrestore(&sp->tx_lock, flags); - return 1; + dev_kfree_skb(skb); + return 0; } queue = 0; - put_off = (u16) mac_control->tx_curr_put_info[queue].offset; - get_off = (u16) mac_control->tx_curr_get_info[queue].offset; - txdp = (TxD_t *) sp->list_info[queue][put_off].list_virt_addr; - queue_len = mac_control->tx_curr_put_info[queue].fifo_len + 1; + put_off = (u16) mac_control->fifos[queue].tx_curr_put_info.offset; + get_off = (u16) mac_control->fifos[queue].tx_curr_get_info.offset; + txdp = (TxD_t *) mac_control->fifos[queue].list_info[put_off]. + list_virt_addr; + + queue_len = mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; /* Avoid "put" pointer going beyond "get" pointer */ if (txdp->Host_Control || (((put_off + 1) % queue_len) == get_off)) { DBG_PRINT(ERR_DBG, "Error in xmit, No free TXDs.\n"); @@ -2720,9 +2563,9 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) frg_cnt = skb_shinfo(skb)->nr_frags; frg_len = skb->len - skb->data_len; - txdp->Host_Control = (unsigned long) skb; txdp->Buffer_Pointer = pci_map_single (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); + txdp->Host_Control = (unsigned long) skb; if (skb->ip_summed == CHECKSUM_HW) { txdp->Control_2 |= (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN | @@ -2747,11 +2590,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_1 |= TXD_GATHER_CODE_LAST; tx_fifo = mac_control->tx_FIFO_start[queue]; - val64 = sp->list_info[queue][put_off].list_phy_addr; + val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr; writeq(val64, &tx_fifo->TxDL_Pointer); val64 = (TX_FIFO_LAST_TXD_NUM(frg_cnt) | TX_FIFO_FIRST_LIST | TX_FIFO_LAST_LIST); + #ifdef NETIF_F_TSO if (mss) val64 |= TX_FIFO_SPECIAL_FUNC; @@ -2762,8 +2606,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) val64 = readq(&bar0->general_int_status); put_off++; - put_off %= mac_control->tx_curr_put_info[queue].fifo_len + 1; - mac_control->tx_curr_put_info[queue].offset = put_off; + put_off %= mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; + mac_control->fifos[queue].tx_curr_put_info.offset = put_off; /* Avoid "put" pointer going beyond "get" pointer */ if (((put_off + 1) % queue_len) == get_off) { @@ -2784,13 +2628,13 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) * @irq: the irq of the device. * @dev_id: a void pointer to the dev structure of the NIC. * @pt_regs: pointer to the registers pushed on the stack. - * Description: This function is the ISR handler of the device. It - * identifies the reason for the interrupt and calls the relevant - * service routines. As a contongency measure, this ISR allocates the + * Description: This function is the ISR handler of the device. It + * identifies the reason for the interrupt and calls the relevant + * service routines. As a contongency measure, this ISR allocates the * recv buffers, if their numbers are below the panic value which is * presently set to 25% of the original number of rcv buffers allocated. * Return value: - * IRQ_HANDLED: will be returned if IRQ was handled by this routine + * IRQ_HANDLED: will be returned if IRQ was handled by this routine * IRQ_NONE: will be returned if interrupt is not from our device */ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) @@ -2798,9 +2642,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) struct net_device *dev = (struct net_device *) dev_id; nic_t *sp = dev->priv; XENA_dev_config_t __iomem *bar0 = sp->bar0; -#ifndef CONFIG_S2IO_NAPI - int i, ret; -#endif + int i; u64 reason = 0; mac_info_t *mac_control; struct config_param *config; @@ -2808,13 +2650,13 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) mac_control = &sp->mac_control; config = &sp->config; - /* + /* * Identify the cause for interrupt and call the appropriate * interrupt handler. Causes for the interrupt could be; * 1. Rx of packet. * 2. Tx complete. * 3. Link down. - * 4. Error in any functional blocks of the NIC. + * 4. Error in any functional blocks of the NIC. */ reason = readq(&bar0->general_int_status); @@ -2823,12 +2665,6 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) return IRQ_NONE; } - /* If Intr is because of Tx Traffic */ - if (reason & GEN_INTR_TXTRAFFIC) { - tx_intr_handler(sp); - } - - /* If Intr is because of an error */ if (reason & (GEN_ERROR_INTR)) alarm_intr_handler(sp); @@ -2843,17 +2679,26 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) #else /* If Intr is because of Rx Traffic */ if (reason & GEN_INTR_RXTRAFFIC) { - rx_intr_handler(sp); + for (i = 0; i < config->rx_ring_num; i++) { + rx_intr_handler(&mac_control->rings[i]); + } } #endif - /* - * If the Rx buffer count is below the panic threshold then - * reallocate the buffers from the interrupt handler itself, + /* If Intr is because of Tx Traffic */ + if (reason & GEN_INTR_TXTRAFFIC) { + for (i = 0; i < config->tx_fifo_num; i++) + tx_intr_handler(&mac_control->fifos[i]); + } + + /* + * If the Rx buffer count is below the panic threshold then + * reallocate the buffers from the interrupt handler itself, * else schedule a tasklet to reallocate the buffers. */ #ifndef CONFIG_S2IO_NAPI for (i = 0; i < config->rx_ring_num; i++) { + int ret; int rxb_size = atomic_read(&sp->rx_bufs_left[i]); int level = rx_buffer_level(sp, rxb_size, i); @@ -2878,29 +2723,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) } /** - * s2io_get_stats - Updates the device statistics structure. + * s2io_get_stats - Updates the device statistics structure. * @dev : pointer to the device structure. * Description: - * This function updates the device statistics structure in the s2io_nic + * This function updates the device statistics structure in the s2io_nic * structure and returns a pointer to the same. * Return value: * pointer to the updated net_device_stats structure. */ -static struct net_device_stats *s2io_get_stats(struct net_device *dev) +struct net_device_stats *s2io_get_stats(struct net_device *dev) { nic_t *sp = dev->priv; mac_info_t *mac_control; struct config_param *config; + mac_control = &sp->mac_control; config = &sp->config; - sp->stats.tx_errors = mac_control->stats_info->tmac_any_err_frms; - sp->stats.rx_errors = mac_control->stats_info->rmac_drop_frms; - sp->stats.multicast = mac_control->stats_info->rmac_vld_mcst_frms; + sp->stats.tx_errors = + le32_to_cpu(mac_control->stats_info->tmac_any_err_frms); + sp->stats.rx_errors = + le32_to_cpu(mac_control->stats_info->rmac_drop_frms); + sp->stats.multicast = + le32_to_cpu(mac_control->stats_info->rmac_vld_mcst_frms); sp->stats.rx_length_errors = - mac_control->stats_info->rmac_long_frms; + le32_to_cpu(mac_control->stats_info->rmac_long_frms); return (&sp->stats); } @@ -2909,8 +2758,8 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev) * s2io_set_multicast - entry point for multicast address enable/disable. * @dev : pointer to the device structure * Description: - * This function is a driver entry point which gets called by the kernel - * whenever multicast addresses must be enabled/disabled. This also gets + * This function is a driver entry point which gets called by the kernel + * whenever multicast addresses must be enabled/disabled. This also gets * called to set/reset promiscuous mode. Depending on the deivce flag, we * determine, if multicast address must be enabled or if promiscuous mode * is to be disabled etc. @@ -3010,7 +2859,7 @@ static void s2io_set_multicast(struct net_device *dev) writeq(RMAC_ADDR_DATA0_MEM_ADDR(dis_addr), &bar0->rmac_addr_data0_mem); writeq(RMAC_ADDR_DATA1_MEM_MASK(0ULL), - &bar0->rmac_addr_data1_mem); + &bar0->rmac_addr_data1_mem); val64 = RMAC_ADDR_CMD_MEM_WE | RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD | RMAC_ADDR_CMD_MEM_OFFSET @@ -3039,8 +2888,7 @@ static void s2io_set_multicast(struct net_device *dev) writeq(RMAC_ADDR_DATA0_MEM_ADDR(mac_addr), &bar0->rmac_addr_data0_mem); writeq(RMAC_ADDR_DATA1_MEM_MASK(0ULL), - &bar0->rmac_addr_data1_mem); - + &bar0->rmac_addr_data1_mem); val64 = RMAC_ADDR_CMD_MEM_WE | RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD | RMAC_ADDR_CMD_MEM_OFFSET @@ -3059,12 +2907,12 @@ static void s2io_set_multicast(struct net_device *dev) } /** - * s2io_set_mac_addr - Programs the Xframe mac address + * s2io_set_mac_addr - Programs the Xframe mac address * @dev : pointer to the device structure. * @addr: a uchar pointer to the new mac address which is to be set. - * Description : This procedure will program the Xframe to receive + * Description : This procedure will program the Xframe to receive * frames with new Mac Address - * Return value: SUCCESS on success and an appropriate (-)ve integer + * Return value: SUCCESS on success and an appropriate (-)ve integer * as defined in errno.h file on failure. */ @@ -3075,10 +2923,10 @@ int s2io_set_mac_addr(struct net_device *dev, u8 * addr) register u64 val64, mac_addr = 0; int i; - /* + /* * Set the new MAC address as the new unicast filter and reflect this * change on the device address registered with the OS. It will be - * at offset 0. + * at offset 0. */ for (i = 0; i < ETH_ALEN; i++) { mac_addr <<= 8; @@ -3102,12 +2950,12 @@ int s2io_set_mac_addr(struct net_device *dev, u8 * addr) } /** - * s2io_ethtool_sset - Sets different link parameters. + * s2io_ethtool_sset - Sets different link parameters. * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @info: pointer to the structure with parameters given by ethtool to set * link information. * Description: - * The function sets different link parameters provided by the user onto + * The function sets different link parameters provided by the user onto * the NIC. * Return value: * 0 on success. @@ -3129,7 +2977,7 @@ static int s2io_ethtool_sset(struct net_device *dev, } /** - * s2io_ethtol_gset - Return link specific information. + * s2io_ethtol_gset - Return link specific information. * @sp : private member of the device structure, pointer to the * s2io_nic structure. * @info : pointer to the structure with parameters given by ethtool @@ -3161,8 +3009,8 @@ static int s2io_ethtool_gset(struct net_device *dev, struct ethtool_cmd *info) } /** - * s2io_ethtool_gdrvinfo - Returns driver specific information. - * @sp : private member of the device structure, which is a pointer to the + * s2io_ethtool_gdrvinfo - Returns driver specific information. + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @info : pointer to the structure with parameters given by ethtool to * return driver information. @@ -3190,9 +3038,9 @@ static void s2io_ethtool_gdrvinfo(struct net_device *dev, /** * s2io_ethtool_gregs - dumps the entire space of Xfame into the buffer. - * @sp: private member of the device structure, which is a pointer to the + * @sp: private member of the device structure, which is a pointer to the * s2io_nic structure. - * @regs : pointer to the structure with parameters given by ethtool for + * @regs : pointer to the structure with parameters given by ethtool for * dumping the registers. * @reg_space: The input argumnet into which all the registers are dumped. * Description: @@ -3221,11 +3069,11 @@ static void s2io_ethtool_gregs(struct net_device *dev, /** * s2io_phy_id - timer function that alternates adapter LED. - * @data : address of the private member of the device structure, which + * @data : address of the private member of the device structure, which * is a pointer to the s2io_nic structure, provided as an u32. - * Description: This is actually the timer function that alternates the - * adapter LED bit of the adapter control bit to set/reset every time on - * invocation. The timer is set for 1/2 a second, hence tha NIC blinks + * Description: This is actually the timer function that alternates the + * adapter LED bit of the adapter control bit to set/reset every time on + * invocation. The timer is set for 1/2 a second, hence tha NIC blinks * once every second. */ static void s2io_phy_id(unsigned long data) @@ -3253,12 +3101,12 @@ static void s2io_phy_id(unsigned long data) * s2io_ethtool_idnic - To physically identify the nic on the system. * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. - * @id : pointer to the structure with identification parameters given by + * @id : pointer to the structure with identification parameters given by * ethtool. * Description: Used to physically identify the NIC on the system. - * The Link LED will blink for a time specified by the user for + * The Link LED will blink for a time specified by the user for * identification. - * NOTE: The Link has to be Up to be able to blink the LED. Hence + * NOTE: The Link has to be Up to be able to blink the LED. Hence * identification is possible only if it's link is up. * Return value: * int , returns 0 on success @@ -3288,9 +3136,9 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data) } mod_timer(&sp->id_timer, jiffies); if (data) - msleep(data * 1000); + msleep_interruptible(data * HZ); else - msleep(0xFFFFFFFF); + msleep_interruptible(MAX_FLICKER_TIME); del_timer_sync(&sp->id_timer); if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) { @@ -3303,7 +3151,8 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data) /** * s2io_ethtool_getpause_data -Pause frame frame generation and reception. - * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. + * @sp : private member of the device structure, which is a pointer to the + * s2io_nic structure. * @ep : pointer to the structure with pause parameters given by ethtool. * Description: * Returns the Pause frame generation and reception capability of the NIC. @@ -3327,7 +3176,7 @@ static void s2io_ethtool_getpause_data(struct net_device *dev, /** * s2io_ethtool_setpause_data - set/reset pause frame generation. - * @sp : private member of the device structure, which is a pointer to the + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @ep : pointer to the structure with pause parameters given by ethtool. * Description: @@ -3338,7 +3187,7 @@ static void s2io_ethtool_getpause_data(struct net_device *dev, */ static int s2io_ethtool_setpause_data(struct net_device *dev, - struct ethtool_pauseparam *ep) + struct ethtool_pauseparam *ep) { u64 val64; nic_t *sp = dev->priv; @@ -3359,13 +3208,13 @@ static int s2io_ethtool_setpause_data(struct net_device *dev, /** * read_eeprom - reads 4 bytes of data from user given offset. - * @sp : private member of the device structure, which is a pointer to the + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @off : offset at which the data must be written * @data : Its an output parameter where the data read at the given - * offset is stored. + * offset is stored. * Description: - * Will read 4 bytes of data from the user given offset and return the + * Will read 4 bytes of data from the user given offset and return the * read data. * NOTE: Will allow to read only part of the EEPROM visible through the * I2C bus. @@ -3406,7 +3255,7 @@ static int read_eeprom(nic_t * sp, int off, u32 * data) * s2io_nic structure. * @off : offset at which the data must be written * @data : The data that is to be written - * @cnt : Number of bytes of the data that are actually to be written into + * @cnt : Number of bytes of the data that are actually to be written into * the Eeprom. (max of 3) * Description: * Actually writes the relevant part of the data value into the Eeprom @@ -3443,7 +3292,7 @@ static int write_eeprom(nic_t * sp, int off, u32 data, int cnt) /** * s2io_ethtool_geeprom - reads the value stored in the Eeprom. * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. - * @eeprom : pointer to the user level structure provided by ethtool, + * @eeprom : pointer to the user level structure provided by ethtool, * containing all relevant information. * @data_buf : user defined value to be written into Eeprom. * Description: Reads the values stored in the Eeprom at given offset @@ -3454,7 +3303,7 @@ static int write_eeprom(nic_t * sp, int off, u32 data, int cnt) */ static int s2io_ethtool_geeprom(struct net_device *dev, - struct ethtool_eeprom *eeprom, u8 * data_buf) + struct ethtool_eeprom *eeprom, u8 * data_buf) { u32 data, i, valid; nic_t *sp = dev->priv; @@ -3479,7 +3328,7 @@ static int s2io_ethtool_geeprom(struct net_device *dev, * s2io_ethtool_seeprom - tries to write the user provided value in Eeprom * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. - * @eeprom : pointer to the user level structure provided by ethtool, + * @eeprom : pointer to the user level structure provided by ethtool, * containing all relevant information. * @data_buf ; user defined value to be written into Eeprom. * Description: @@ -3527,8 +3376,8 @@ static int s2io_ethtool_seeprom(struct net_device *dev, } /** - * s2io_register_test - reads and writes into all clock domains. - * @sp : private member of the device structure, which is a pointer to the + * s2io_register_test - reads and writes into all clock domains. + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @data : variable that returns the result of each of the test conducted b * by the driver. @@ -3545,8 +3394,8 @@ static int s2io_register_test(nic_t * sp, uint64_t * data) u64 val64 = 0; int fail = 0; - val64 = readq(&bar0->pcc_enable); - if (val64 != 0xff00000000000000ULL) { + val64 = readq(&bar0->pif_rd_swapper_fb); + if (val64 != 0x123456789abcdefULL) { fail = 1; DBG_PRINT(INFO_DBG, "Read Test level 1 fails\n"); } @@ -3590,13 +3439,13 @@ static int s2io_register_test(nic_t * sp, uint64_t * data) } /** - * s2io_eeprom_test - to verify that EEprom in the xena can be programmed. + * s2io_eeprom_test - to verify that EEprom in the xena can be programmed. * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * @data:variable that returns the result of each of the test conducted by * the driver. * Description: - * Verify that EEPROM in the xena can be programmed using I2C_CONTROL + * Verify that EEPROM in the xena can be programmed using I2C_CONTROL * register. * Return value: * 0 on success. @@ -3661,14 +3510,14 @@ static int s2io_eeprom_test(nic_t * sp, uint64_t * data) /** * s2io_bist_test - invokes the MemBist test of the card . - * @sp : private member of the device structure, which is a pointer to the + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. - * @data:variable that returns the result of each of the test conducted by + * @data:variable that returns the result of each of the test conducted by * the driver. * Description: * This invokes the MemBist test of the card. We give around * 2 secs time for the Test to complete. If it's still not complete - * within this peiod, we consider that the test failed. + * within this peiod, we consider that the test failed. * Return value: * 0 on success and -1 on failure. */ @@ -3697,13 +3546,13 @@ static int s2io_bist_test(nic_t * sp, uint64_t * data) } /** - * s2io-link_test - verifies the link state of the nic - * @sp ; private member of the device structure, which is a pointer to the + * s2io-link_test - verifies the link state of the nic + * @sp ; private member of the device structure, which is a pointer to the * s2io_nic structure. * @data: variable that returns the result of each of the test conducted by * the driver. * Description: - * The function verifies the link state of the NIC and updates the input + * The function verifies the link state of the NIC and updates the input * argument 'data' appropriately. * Return value: * 0 on success. @@ -3722,13 +3571,13 @@ static int s2io_link_test(nic_t * sp, uint64_t * data) } /** - * s2io_rldram_test - offline test for access to the RldRam chip on the NIC - * @sp - private member of the device structure, which is a pointer to the + * s2io_rldram_test - offline test for access to the RldRam chip on the NIC + * @sp - private member of the device structure, which is a pointer to the * s2io_nic structure. - * @data - variable that returns the result of each of the test + * @data - variable that returns the result of each of the test * conducted by the driver. * Description: - * This is one of the offline test that tests the read and write + * This is one of the offline test that tests the read and write * access to the RldRam chip on the NIC. * Return value: * 0 on success. @@ -3833,7 +3682,7 @@ static int s2io_rldram_test(nic_t * sp, uint64_t * data) * s2io_nic structure. * @ethtest : pointer to a ethtool command specific structure that will be * returned to the user. - * @data : variable that returns the result of each of the test + * @data : variable that returns the result of each of the test * conducted by the driver. * Description: * This function conducts 6 tests ( 4 offline and 2 online) to determine @@ -3851,23 +3700,18 @@ static void s2io_ethtool_test(struct net_device *dev, if (ethtest->flags == ETH_TEST_FL_OFFLINE) { /* Offline Tests. */ - if (orig_state) { + if (orig_state) s2io_close(sp->dev); - s2io_set_swapper(sp); - } else - s2io_set_swapper(sp); if (s2io_register_test(sp, &data[0])) ethtest->flags |= ETH_TEST_FL_FAILED; s2io_reset(sp); - s2io_set_swapper(sp); if (s2io_rldram_test(sp, &data[3])) ethtest->flags |= ETH_TEST_FL_FAILED; s2io_reset(sp); - s2io_set_swapper(sp); if (s2io_eeprom_test(sp, &data[1])) ethtest->flags |= ETH_TEST_FL_FAILED; @@ -3951,20 +3795,19 @@ static void s2io_get_ethtool_stats(struct net_device *dev, tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_tcp); } -static int s2io_ethtool_get_regs_len(struct net_device *dev) +int s2io_ethtool_get_regs_len(struct net_device *dev) { return (XENA_REG_SPACE); } -static u32 s2io_ethtool_get_rx_csum(struct net_device * dev) +u32 s2io_ethtool_get_rx_csum(struct net_device * dev) { nic_t *sp = dev->priv; return (sp->rx_csum); } - -static int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data) +int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data) { nic_t *sp = dev->priv; @@ -3975,19 +3818,17 @@ static int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data) return 0; } - -static int s2io_get_eeprom_len(struct net_device *dev) +int s2io_get_eeprom_len(struct net_device *dev) { return (XENA_EEPROM_SPACE); } -static int s2io_ethtool_self_test_count(struct net_device *dev) +int s2io_ethtool_self_test_count(struct net_device *dev) { return (S2IO_TEST_LEN); } - -static void s2io_ethtool_get_strings(struct net_device *dev, - u32 stringset, u8 * data) +void s2io_ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 * data) { switch (stringset) { case ETH_SS_TEST: @@ -3998,13 +3839,12 @@ static void s2io_ethtool_get_strings(struct net_device *dev, sizeof(ethtool_stats_keys)); } } - static int s2io_ethtool_get_stats_count(struct net_device *dev) { return (S2IO_STAT_LEN); } -static int s2io_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) +int s2io_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) dev->features |= NETIF_F_IP_CSUM; @@ -4046,21 +3886,18 @@ static struct ethtool_ops netdev_ethtool_ops = { }; /** - * s2io_ioctl - Entry point for the Ioctl + * s2io_ioctl - Entry point for the Ioctl * @dev : Device pointer. * @ifr : An IOCTL specefic structure, that can contain a pointer to * a proprietary structure used to pass information to the driver. * @cmd : This is used to distinguish between the different commands that * can be passed to the IOCTL functions. * Description: - * This function has support for ethtool, adding multiple MAC addresses on - * the NIC and some DBG commands for the util tool. - * Return value: - * Currently the IOCTL supports no operations, hence by default this - * function returns OP NOT SUPPORTED value. + * Currently there are no special functionality supported in IOCTL, hence + * function always return EOPNOTSUPPORTED */ -static int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { return -EOPNOTSUPP; } @@ -4076,7 +3913,7 @@ static int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * file on failure. */ -static int s2io_change_mtu(struct net_device *dev, int new_mtu) +int s2io_change_mtu(struct net_device *dev, int new_mtu) { nic_t *sp = dev->priv; XENA_dev_config_t __iomem *bar0 = sp->bar0; @@ -4084,7 +3921,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) { DBG_PRINT(ERR_DBG, "%s: Must be stopped to ", dev->name); - DBG_PRINT(ERR_DBG, "change its MTU \n"); + DBG_PRINT(ERR_DBG, "change its MTU\n"); return -EBUSY; } @@ -4108,9 +3945,9 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu) * @dev_adr : address of the device structure in dma_addr_t format. * Description: * This is the tasklet or the bottom half of the ISR. This is - * an extension of the ISR which is scheduled by the scheduler to be run + * an extension of the ISR which is scheduled by the scheduler to be run * when the load on the CPU is low. All low priority tasks of the ISR can - * be pushed into the tasklet. For now the tasklet is used only to + * be pushed into the tasklet. For now the tasklet is used only to * replenish the Rx buffers in the Rx buffer descriptors. * Return value: * void. @@ -4166,14 +4003,14 @@ static void s2io_set_link(unsigned long data) } subid = nic->pdev->subsystem_device; - /* - * Allow a small delay for the NICs self initiated + /* + * Allow a small delay for the NICs self initiated * cleanup to complete. */ msleep(100); val64 = readq(&bar0->adapter_status); - if (verify_xena_quiescence(val64, nic->device_enabled_once)) { + if (verify_xena_quiescence(nic, val64, nic->device_enabled_once)) { if (LINK_IS_UP(val64)) { val64 = readq(&bar0->adapter_control); val64 |= ADAPTER_CNTL_EN; @@ -4224,8 +4061,9 @@ static void s2io_card_down(nic_t * sp) register u64 val64 = 0; /* If s2io_set_link task is executing, wait till it completes. */ - while (test_and_set_bit(0, &(sp->link_state))) + while (test_and_set_bit(0, &(sp->link_state))) { msleep(50); + } atomic_set(&sp->card_state, CARD_DOWN); /* disable Tx and Rx traffic on the NIC */ @@ -4237,7 +4075,7 @@ static void s2io_card_down(nic_t * sp) /* Check if the device is Quiescent and then Reset the NIC */ do { val64 = readq(&bar0->adapter_status); - if (verify_xena_quiescence(val64, sp->device_enabled_once)) { + if (verify_xena_quiescence(sp, val64, sp->device_enabled_once)) { break; } @@ -4276,8 +4114,8 @@ static int s2io_card_up(nic_t * sp) return -ENODEV; } - /* - * Initializing the Rx buffers. For now we are considering only 1 + /* + * Initializing the Rx buffers. For now we are considering only 1 * Rx ring and initializing buffers into 30 Rx blocks */ mac_control = &sp->mac_control; @@ -4315,12 +4153,12 @@ static int s2io_card_up(nic_t * sp) return 0; } -/** +/** * s2io_restart_nic - Resets the NIC. * @data : long pointer to the device private structure * Description: * This function is scheduled to be run by the s2io_tx_watchdog - * function after 0.5 secs to reset the NIC. The idea is to reduce + * function after 0.5 secs to reset the NIC. The idea is to reduce * the run time of the watch dog routine which is run holding a * spin lock. */ @@ -4338,10 +4176,11 @@ static void s2io_restart_nic(unsigned long data) netif_wake_queue(dev); DBG_PRINT(ERR_DBG, "%s: was reset by Tx watchdog timer\n", dev->name); + } -/** - * s2io_tx_watchdog - Watchdog for transmit side. +/** + * s2io_tx_watchdog - Watchdog for transmit side. * @dev : Pointer to net device structure * Description: * This function is triggered if the Tx Queue is stopped @@ -4369,7 +4208,7 @@ static void s2io_tx_watchdog(struct net_device *dev) * @len : length of the packet * @cksum : FCS checksum of the frame. * @ring_no : the ring from which this RxD was extracted. - * Description: + * Description: * This function is called by the Tx interrupt serivce routine to perform * some OS related operations on the SKB before passing it to the upper * layers. It mainly checks if the checksum is OK, if so adds it to the @@ -4379,71 +4218,33 @@ static void s2io_tx_watchdog(struct net_device *dev) * Return value: * SUCCESS on success and -1 on failure. */ -#ifndef CONFIG_2BUFF_MODE -static int rx_osm_handler(nic_t * sp, u16 len, RxD_t * rxdp, int ring_no) -#else -static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no, - buffAdd_t * ba) -#endif +static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) { + nic_t *sp = ring_data->nic; struct net_device *dev = (struct net_device *) sp->dev; - struct sk_buff *skb = - (struct sk_buff *) ((unsigned long) rxdp->Host_Control); + struct sk_buff *skb = (struct sk_buff *) + ((unsigned long) rxdp->Host_Control); + int ring_no = ring_data->ring_no; u16 l3_csum, l4_csum; #ifdef CONFIG_2BUFF_MODE - int buf0_len, buf2_len; + int buf0_len = RXD_GET_BUFFER0_SIZE(rxdp->Control_2); + int buf2_len = RXD_GET_BUFFER2_SIZE(rxdp->Control_2); + int get_block = ring_data->rx_curr_get_info.block_index; + int get_off = ring_data->rx_curr_get_info.offset; + buffAdd_t *ba = &ring_data->ba[get_block][get_off]; unsigned char *buff; +#else + u16 len = (u16) ((RXD_GET_BUFFER0_SIZE(rxdp->Control_2)) >> 48);; #endif - - l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); - if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && (sp->rx_csum)) { - l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); - if ((l3_csum == L3_CKSUM_OK) && (l4_csum == L4_CKSUM_OK)) { - /* - * NIC verifies if the Checksum of the received - * frame is Ok or not and accordingly returns - * a flag in the RxD. - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { - /* - * Packet with erroneous checksum, let the - * upper layers deal with it. - */ - skb->ip_summed = CHECKSUM_NONE; - } - } else { - skb->ip_summed = CHECKSUM_NONE; - } - + skb->dev = dev; if (rxdp->Control_1 & RXD_T_CODE) { unsigned long long err = rxdp->Control_1 & RXD_T_CODE; DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n", dev->name, err); } -#ifdef CONFIG_2BUFF_MODE - buf0_len = RXD_GET_BUFFER0_SIZE(rxdp->Control_2); - buf2_len = RXD_GET_BUFFER2_SIZE(rxdp->Control_2); -#endif - skb->dev = dev; -#ifndef CONFIG_2BUFF_MODE - skb_put(skb, len); - skb->protocol = eth_type_trans(skb, dev); -#else - buff = skb_push(skb, buf0_len); - memcpy(buff, ba->ba_0, buf0_len); - skb_put(skb, buf2_len); - skb->protocol = eth_type_trans(skb, dev); -#endif - -#ifdef CONFIG_S2IO_NAPI - netif_receive_skb(skb); -#else - netif_rx(skb); -#endif - - dev->last_rx = jiffies; + /* Updating statistics */ + rxdp->Host_Control = 0; sp->rx_pkt_count++; sp->stats.rx_packets++; #ifndef CONFIG_2BUFF_MODE @@ -4452,8 +4253,44 @@ static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no, sp->stats.rx_bytes += buf0_len + buf2_len; #endif +#ifndef CONFIG_2BUFF_MODE + skb_put(skb, len); +#else + buff = skb_push(skb, buf0_len); + memcpy(buff, ba->ba_0, buf0_len); + skb_put(skb, buf2_len); +#endif + + if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && + (sp->rx_csum)) { + l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); + l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); + if ((l3_csum == L3_CKSUM_OK) && (l4_csum == L4_CKSUM_OK)) { + /* + * NIC verifies if the Checksum of the received + * frame is Ok or not and accordingly returns + * a flag in the RxD. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + /* + * Packet with erroneous checksum, let the + * upper layers deal with it. + */ + skb->ip_summed = CHECKSUM_NONE; + } + } else { + skb->ip_summed = CHECKSUM_NONE; + } + + skb->protocol = eth_type_trans(skb, dev); +#ifdef CONFIG_S2IO_NAPI + netif_receive_skb(skb); +#else + netif_rx(skb); +#endif + dev->last_rx = jiffies; atomic_dec(&sp->rx_bufs_left[ring_no]); - rxdp->Host_Control = 0; return SUCCESS; } @@ -4464,13 +4301,13 @@ static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no, * @link : inidicates whether link is UP/DOWN. * Description: * This function stops/starts the Tx queue depending on whether the link - * status of the NIC is is down or up. This is called by the Alarm - * interrupt handler whenever a link change interrupt comes up. + * status of the NIC is is down or up. This is called by the Alarm + * interrupt handler whenever a link change interrupt comes up. * Return value: * void. */ -static void s2io_link(nic_t * sp, int link) +void s2io_link(nic_t * sp, int link) { struct net_device *dev = (struct net_device *) sp->dev; @@ -4487,8 +4324,25 @@ static void s2io_link(nic_t * sp, int link) } /** - * s2io_init_pci -Initialization of PCI and PCI-X configuration registers . - * @sp : private member of the device structure, which is a pointer to the + * get_xena_rev_id - to identify revision ID of xena. + * @pdev : PCI Dev structure + * Description: + * Function to identify the Revision ID of xena. + * Return value: + * returns the revision ID of the device. + */ + +int get_xena_rev_id(struct pci_dev *pdev) +{ + u8 id = 0; + int ret; + ret = pci_read_config_byte(pdev, PCI_REVISION_ID, (u8 *) & id); + return id; +} + +/** + * s2io_init_pci -Initialization of PCI and PCI-X configuration registers . + * @sp : private member of the device structure, which is a pointer to the * s2io_nic structure. * Description: * This function initializes a few of the PCI and PCI-X configuration registers @@ -4499,15 +4353,15 @@ static void s2io_link(nic_t * sp, int link) static void s2io_init_pci(nic_t * sp) { - u16 pci_cmd = 0; + u16 pci_cmd = 0, pcix_cmd = 0; /* Enable Data Parity Error Recovery in PCI-X command register. */ pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(sp->pcix_cmd)); + &(pcix_cmd)); pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - (sp->pcix_cmd | 1)); + (pcix_cmd | 1)); pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(sp->pcix_cmd)); + &(pcix_cmd)); /* Set the PErr Response bit in PCI command register. */ pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); @@ -4516,34 +4370,36 @@ static void s2io_init_pci(nic_t * sp) pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); /* Set MMRB count to 1024 in PCI-X Command register. */ - sp->pcix_cmd &= 0xFFF3; - pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, (sp->pcix_cmd | (0x1 << 2))); /* MMRBC 1K */ + pcix_cmd &= 0xFFF3; + pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, + (pcix_cmd | (0x1 << 2))); /* MMRBC 1K */ pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(sp->pcix_cmd)); + &(pcix_cmd)); /* Setting Maximum outstanding splits based on system type. */ - sp->pcix_cmd &= 0xFF8F; + pcix_cmd &= 0xFF8F; + pcix_cmd |= XENA_MAX_OUTSTANDING_SPLITS(0x1); /* 2 splits. */ + pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, + pcix_cmd); + pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, + &(pcix_cmd)); - sp->pcix_cmd |= XENA_MAX_OUTSTANDING_SPLITS(0x1); /* 2 splits. */ - pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - sp->pcix_cmd); - pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(sp->pcix_cmd)); /* Forcibly disabling relaxed ordering capability of the card. */ - sp->pcix_cmd &= 0xfffd; + pcix_cmd &= 0xfffd; pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - sp->pcix_cmd); + pcix_cmd); pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(sp->pcix_cmd)); + &(pcix_cmd)); } MODULE_AUTHOR("Raghavendra Koushik "); MODULE_LICENSE("GPL"); module_param(tx_fifo_num, int, 0); -module_param_array(tx_fifo_len, int, NULL, 0); module_param(rx_ring_num, int, 0); -module_param_array(rx_ring_sz, int, NULL, 0); +module_param_array(tx_fifo_len, uint, NULL, 0); +module_param_array(rx_ring_sz, uint, NULL, 0); module_param(Stats_refresh_time, int, 0); +module_param_array(rts_frm_len, uint, NULL, 0); module_param(rmac_pause_time, int, 0); module_param(mc_pause_threshold_q0q3, int, 0); module_param(mc_pause_threshold_q4q7, int, 0); @@ -4553,15 +4409,16 @@ module_param(rmac_util_period, int, 0); #ifndef CONFIG_S2IO_NAPI module_param(indicate_max_pkts, int, 0); #endif + /** - * s2io_init_nic - Initialization of the adapter . + * s2io_init_nic - Initialization of the adapter . * @pdev : structure containing the PCI related information of the device. * @pre: List of PCI devices supported by the driver listed in s2io_tbl. * Description: * The function initializes an adapter identified by the pci_dec structure. - * All OS related initialization including memory and device structure and - * initlaization of the device private variable is done. Also the swapper - * control register is initialized to enable read and write into the I/O + * All OS related initialization including memory and device structure and + * initlaization of the device private variable is done. Also the swapper + * control register is initialized to enable read and write into the I/O * registers of the device. * Return value: * returns 0 on success and negative on failure. @@ -4572,7 +4429,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) { nic_t *sp; struct net_device *dev; - char *dev_name = "S2IO 10GE NIC"; int i, j, ret; int dma_flag = FALSE; u32 mac_up, mac_down; @@ -4582,9 +4438,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) mac_info_t *mac_control; struct config_param *config; - - DBG_PRINT(ERR_DBG, "Loading S2IO driver with %s\n", - s2io_driver_version); +#ifdef CONFIG_S2IO_NAPI + DBG_PRINT(ERR_DBG, "NAPI support has been enabled\n"); +#endif if ((ret = pci_enable_device(pdev))) { DBG_PRINT(ERR_DBG, @@ -4595,7 +4451,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { DBG_PRINT(INIT_DBG, "s2io_init_nic: Using 64bit DMA\n"); dma_flag = TRUE; - if (pci_set_consistent_dma_mask (pdev, DMA_64BIT_MASK)) { DBG_PRINT(ERR_DBG, @@ -4635,21 +4490,17 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) memset(sp, 0, sizeof(nic_t)); sp->dev = dev; sp->pdev = pdev; - sp->vendor_id = pdev->vendor; - sp->device_id = pdev->device; sp->high_dma_flag = dma_flag; - sp->irq = pdev->irq; sp->device_enabled_once = FALSE; - strcpy(sp->name, dev_name); /* Initialize some PCI/PCI-X fields of the NIC. */ s2io_init_pci(sp); - /* + /* * Setting the device configuration parameters. - * Most of these parameters can be specified by the user during - * module insertion as they are module loadable parameters. If - * these parameters are not not specified during load time, they + * Most of these parameters can be specified by the user during + * module insertion as they are module loadable parameters. If + * these parameters are not not specified during load time, they * are initialized with default values. */ mac_control = &sp->mac_control; @@ -4663,6 +4514,10 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) config->tx_cfg[i].fifo_priority = i; } + /* mapping the QoS priority to the configured fifos */ + for (i = 0; i < MAX_TX_FIFOS; i++) + config->fifo_mapping[i] = fifo_map[config->tx_fifo_num][i]; + config->tx_intr_type = TXD_INT_TYPE_UTILZ; for (i = 0; i < config->tx_fifo_num; i++) { config->tx_cfg[i].f_no_snoop = @@ -4743,13 +4598,14 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) dev->do_ioctl = &s2io_ioctl; dev->change_mtu = &s2io_change_mtu; SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + /* * will use eth_mac_addr() for dev->set_mac_address * mac address will be set every time dev->open() is called */ -#ifdef CONFIG_S2IO_NAPI +#if defined(CONFIG_S2IO_NAPI) dev->poll = s2io_poll; - dev->weight = 90; + dev->weight = 32; #endif dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; @@ -4776,22 +4632,14 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) goto set_swap_failed; } - /* Fix for all "FFs" MAC address problems observed on Alpha platforms */ + /* + * Fix for all "FFs" MAC address problems observed on + * Alpha platforms + */ fix_mac_address(sp); s2io_reset(sp); /* - * Setting swapper control on the NIC, so the MAC address can be read. - */ - if (s2io_set_swapper(sp)) { - DBG_PRINT(ERR_DBG, - "%s: S2IO: swapper settings are wrong\n", - dev->name); - ret = -EAGAIN; - goto set_swap_failed; - } - - /* * MAC address initialization. * For now only one mac address will be read and used. */ @@ -4828,23 +4676,22 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) memcpy(dev->dev_addr, sp->def_mac_addr, ETH_ALEN); /* - * Initialize the tasklet status and link state flags + * Initialize the tasklet status and link state flags * and the card statte parameter */ atomic_set(&(sp->card_state), 0); sp->tasklet_status = 0; sp->link_state = 0; - /* Initialize spinlocks */ spin_lock_init(&sp->tx_lock); #ifndef CONFIG_S2IO_NAPI spin_lock_init(&sp->put_lock); #endif - /* - * SXE-002: Configure link and activity LED to init state - * on driver load. + /* + * SXE-002: Configure link and activity LED to init state + * on driver load. */ subid = sp->pdev->subsystem_device; if ((subid & 0xFF) >= 0x07) { @@ -4864,9 +4711,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) goto register_failed; } - /* - * Make Link state as off at this point, when the Link change - * interrupt comes the state will be automatically changed to + /* + * Make Link state as off at this point, when the Link change + * interrupt comes the state will be automatically changed to * the right state. */ netif_carrier_off(dev); @@ -4891,11 +4738,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) } /** - * s2io_rem_nic - Free the PCI device + * s2io_rem_nic - Free the PCI device * @pdev: structure containing the PCI related information of the device. - * Description: This function is called by the Pci subsystem to release a + * Description: This function is called by the Pci subsystem to release a * PCI device and free up all resource held up by the device. This could - * be in response to a Hot plug event or when the driver is to be removed + * be in response to a Hot plug event or when the driver is to be removed * from memory. */ @@ -4919,7 +4766,6 @@ static void __devexit s2io_rem_nic(struct pci_dev *pdev) pci_disable_device(pdev); pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); - free_netdev(dev); } @@ -4935,11 +4781,11 @@ int __init s2io_starter(void) } /** - * s2io_closer - Cleanup routine for the driver + * s2io_closer - Cleanup routine for the driver * Description: This function is the cleanup routine for the driver. It unregist * ers the driver. */ -static void s2io_closer(void) +void s2io_closer(void) { pci_unregister_driver(&s2io_driver); DBG_PRINT(INIT_DBG, "cleanup done\n"); diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 1711c8c3dc99..4d2fc7a40434 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -31,6 +31,9 @@ #define SUCCESS 0 #define FAILURE -1 +/* Maximum time to flicker LED when asked to identify NIC using ethtool */ +#define MAX_FLICKER_TIME 60000 /* 60 Secs */ + /* Maximum outstanding splits to be configured into xena. */ typedef enum xena_max_outstanding_splits { XENA_ONE_SPLIT_TRANSACTION = 0, @@ -45,10 +48,10 @@ typedef enum xena_max_outstanding_splits { #define XENA_MAX_OUTSTANDING_SPLITS(n) (n << 4) /* OS concerned variables and constants */ -#define WATCH_DOG_TIMEOUT 5*HZ -#define EFILL 0x1234 -#define ALIGN_SIZE 127 -#define PCIX_COMMAND_REGISTER 0x62 +#define WATCH_DOG_TIMEOUT 15*HZ +#define EFILL 0x1234 +#define ALIGN_SIZE 127 +#define PCIX_COMMAND_REGISTER 0x62 /* * Debug related variables. @@ -61,7 +64,7 @@ typedef enum xena_max_outstanding_splits { #define INTR_DBG 4 /* Global variable that defines the present debug level of the driver. */ -static int debug_level = ERR_DBG; /* Default level. */ +int debug_level = ERR_DBG; /* Default level. */ /* DEBUG message print. */ #define DBG_PRINT(dbg_level, args...) if(!(debug_level> 48) @@ -382,7 +408,7 @@ typedef struct _RxD_t { #endif } RxD_t; -/* Structure that represents the Rx descriptor block which contains +/* Structure that represents the Rx descriptor block which contains * 128 Rx descriptors. */ #ifndef CONFIG_2BUFF_MODE @@ -392,11 +418,11 @@ typedef struct _RxD_block { u64 reserved_0; #define END_OF_BLOCK 0xFEFFFFFFFFFFFFFFULL - u64 reserved_1; /* 0xFEFFFFFFFFFFFFFF to mark last + u64 reserved_1; /* 0xFEFFFFFFFFFFFFFF to mark last * Rxd in this blk */ u64 reserved_2_pNext_RxD_block; /* Logical ptr to next */ u64 pNext_RxD_Blk_physical; /* Buff0_ptr.In a 32 bit arch - * the upper 32 bits should + * the upper 32 bits should * be 0 */ } RxD_block_t; #else @@ -405,13 +431,13 @@ typedef struct _RxD_block { RxD_t rxd[MAX_RXDS_PER_BLOCK]; #define END_OF_BLOCK 0xFEFFFFFFFFFFFFFFULL - u64 reserved_1; /* 0xFEFFFFFFFFFFFFFF to mark last Rxd + u64 reserved_1; /* 0xFEFFFFFFFFFFFFFF to mark last Rxd * in this blk */ u64 pNext_RxD_Blk_physical; /* Phy ponter to next blk. */ } RxD_block_t; #define SIZE_OF_BLOCK 4096 -/* Structure to hold virtual addresses of Buf0 and Buf1 in +/* Structure to hold virtual addresses of Buf0 and Buf1 in * 2buf mode. */ typedef struct bufAdd { void *ba_0_org; @@ -423,8 +449,8 @@ typedef struct bufAdd { /* Structure which stores all the MAC control parameters */ -/* This structure stores the offset of the RxD in the ring - * from which the Rx Interrupt processor can start picking +/* This structure stores the offset of the RxD in the ring + * from which the Rx Interrupt processor can start picking * up the RxDs for processing. */ typedef struct _rx_curr_get_info_t { @@ -436,7 +462,7 @@ typedef struct _rx_curr_get_info_t { typedef rx_curr_get_info_t rx_curr_put_info_t; /* This structure stores the offset of the TxDl in the FIFO - * from which the Tx Interrupt processor can start picking + * from which the Tx Interrupt processor can start picking * up the TxDLs for send complete interrupt processing. */ typedef struct { @@ -446,32 +472,96 @@ typedef struct { typedef tx_curr_get_info_t tx_curr_put_info_t; +/* Structure that holds the Phy and virt addresses of the Blocks */ +typedef struct rx_block_info { + RxD_t *block_virt_addr; + dma_addr_t block_dma_addr; +} rx_block_info_t; + +/* pre declaration of the nic structure */ +typedef struct s2io_nic nic_t; + +/* Ring specific structure */ +typedef struct ring_info { + /* The ring number */ + int ring_no; + + /* + * Place holders for the virtual and physical addresses of + * all the Rx Blocks + */ + rx_block_info_t rx_blocks[MAX_RX_BLOCKS_PER_RING]; + int block_count; + int pkt_cnt; + + /* + * Put pointer info which indictes which RxD has to be replenished + * with a new buffer. + */ + rx_curr_put_info_t rx_curr_put_info; + + /* + * Get pointer info which indictes which is the last RxD that was + * processed by the driver. + */ + rx_curr_get_info_t rx_curr_get_info; + +#ifndef CONFIG_S2IO_NAPI + /* Index to the absolute position of the put pointer of Rx ring */ + int put_pos; +#endif + +#ifdef CONFIG_2BUFF_MODE + /* Buffer Address store. */ + buffAdd_t **ba; +#endif + nic_t *nic; +} ring_info_t; + +/* Fifo specific structure */ +typedef struct fifo_info { + /* FIFO number */ + int fifo_no; + + /* Maximum TxDs per TxDL */ + int max_txds; + + /* Place holder of all the TX List's Phy and Virt addresses. */ + list_info_hold_t *list_info; + + /* + * Current offset within the tx FIFO where driver would write + * new Tx frame + */ + tx_curr_put_info_t tx_curr_put_info; + + /* + * Current offset within tx FIFO from where the driver would start freeing + * the buffers + */ + tx_curr_get_info_t tx_curr_get_info; + + nic_t *nic; +}fifo_info_t; + /* Infomation related to the Tx and Rx FIFOs and Rings of Xena * is maintained in this structure. */ typedef struct mac_info { -/* rx side stuff */ - /* Put pointer info which indictes which RxD has to be replenished - * with a new buffer. - */ - rx_curr_put_info_t rx_curr_put_info[MAX_RX_RINGS]; - - /* Get pointer info which indictes which is the last RxD that was - * processed by the driver. - */ - rx_curr_get_info_t rx_curr_get_info[MAX_RX_RINGS]; - - u16 rmac_pause_time; - u16 mc_pause_threshold_q0q3; - u16 mc_pause_threshold_q4q7; - /* tx side stuff */ /* logical pointer of start of each Tx FIFO */ TxFIFO_element_t __iomem *tx_FIFO_start[MAX_TX_FIFOS]; -/* Current offset within tx_FIFO_start, where driver would write new Tx frame*/ - tx_curr_put_info_t tx_curr_put_info[MAX_TX_FIFOS]; - tx_curr_get_info_t tx_curr_get_info[MAX_TX_FIFOS]; + /* Fifo specific structure */ + fifo_info_t fifos[MAX_TX_FIFOS]; + +/* rx side stuff */ + /* Ring specific structure */ + ring_info_t rings[MAX_RX_RINGS]; + + u16 rmac_pause_time; + u16 mc_pause_threshold_q0q3; + u16 mc_pause_threshold_q4q7; void *stats_mem; /* orignal pointer to allocated mem */ dma_addr_t stats_mem_phy; /* Physical address of the stat block */ @@ -485,12 +575,6 @@ typedef struct { int usage_cnt; } usr_addr_t; -/* Structure that holds the Phy and virt addresses of the Blocks */ -typedef struct rx_block_info { - RxD_t *block_virt_addr; - dma_addr_t block_dma_addr; -} rx_block_info_t; - /* Default Tunable parameters of the NIC. */ #define DEFAULT_FIFO_LEN 4096 #define SMALL_RXD_CNT 30 * (MAX_RXDS_PER_BLOCK+1) @@ -499,7 +583,20 @@ typedef struct rx_block_info { #define LARGE_BLK_CNT 100 /* Structure representing one instance of the NIC */ -typedef struct s2io_nic { +struct s2io_nic { +#ifdef CONFIG_S2IO_NAPI + /* + * Count of packets to be processed in a given iteration, it will be indicated + * by the quota field of the device structure when NAPI is enabled. + */ + int pkts_to_process; +#endif + struct net_device *dev; + mac_info_t mac_control; + struct config_param config; + struct pci_dev *pdev; + void __iomem *bar0; + void __iomem *bar1; #define MAX_MAC_SUPPORTED 16 #define MAX_SUPPORTED_MULTICASTS MAX_MAC_SUPPORTED @@ -507,33 +604,17 @@ typedef struct s2io_nic { macaddr_t pre_mac_addr[MAX_MAC_SUPPORTED]; struct net_device_stats stats; - void __iomem *bar0; - void __iomem *bar1; - struct config_param config; - mac_info_t mac_control; int high_dma_flag; int device_close_flag; int device_enabled_once; - char name[32]; + char name[50]; struct tasklet_struct task; volatile unsigned long tasklet_status; - struct timer_list timer; - struct net_device *dev; - struct pci_dev *pdev; - u16 vendor_id; - u16 device_id; - u16 ccmd; - u32 cbar0_1; - u32 cbar0_2; - u32 cbar1_1; - u32 cbar1_2; - u32 cirq; - u8 cache_line; - u32 rom_expansion; - u16 pcix_cmd; - u32 irq; + /* Space to back up the PCI config space */ + u32 config_space[256 / sizeof(u32)]; + atomic_t rx_bufs_left[MAX_RX_RINGS]; spinlock_t tx_lock; @@ -558,27 +639,11 @@ typedef struct s2io_nic { u16 tx_err_count; u16 rx_err_count; -#ifndef CONFIG_S2IO_NAPI - /* Index to the absolute position of the put pointer of Rx ring. */ - int put_pos[MAX_RX_RINGS]; -#endif - - /* - * Place holders for the virtual and physical addresses of - * all the Rx Blocks - */ - rx_block_info_t rx_blocks[MAX_RX_RINGS][MAX_RX_BLOCKS_PER_RING]; - int block_count[MAX_RX_RINGS]; - int pkt_cnt[MAX_RX_RINGS]; - - /* Place holder of all the TX List's Phy and Virt addresses. */ - list_info_hold_t *list_info[MAX_TX_FIFOS]; - /* Id timer, used to blink NIC to physically identify NIC. */ struct timer_list id_timer; /* Restart timer, used to restart NIC if the device is stuck and - * a schedule task that will set the correct Link state once the + * a schedule task that will set the correct Link state once the * NIC's PHY has stabilized after a state change. */ #ifdef INIT_TQUEUE @@ -589,12 +654,12 @@ typedef struct s2io_nic { struct work_struct set_link_task; #endif - /* Flag that can be used to turn on or turn off the Rx checksum + /* Flag that can be used to turn on or turn off the Rx checksum * offload feature. */ int rx_csum; - /* after blink, the adapter must be restored with original + /* after blink, the adapter must be restored with original * values. */ u64 adapt_ctrl_org; @@ -604,16 +669,12 @@ typedef struct s2io_nic { #define LINK_DOWN 1 #define LINK_UP 2 -#ifdef CONFIG_2BUFF_MODE - /* Buffer Address store. */ - buffAdd_t **ba[MAX_RX_RINGS]; -#endif int task_flag; #define CARD_DOWN 1 #define CARD_UP 2 atomic_t card_state; volatile unsigned long link_state; -} nic_t; +}; #define RESET_ERROR 1; #define CMD_ERROR 2; @@ -622,9 +683,10 @@ typedef struct s2io_nic { #ifndef readq static inline u64 readq(void __iomem *addr) { - u64 ret = readl(addr + 4); - ret <<= 32; - ret |= readl(addr); + u64 ret = 0; + ret = readl(addr + 4); + (u64) ret <<= 32; + (u64) ret |= readl(addr); return ret; } @@ -637,10 +699,10 @@ static inline void writeq(u64 val, void __iomem *addr) writel((u32) (val >> 32), (addr + 4)); } -/* In 32 bit modes, some registers have to be written in a +/* In 32 bit modes, some registers have to be written in a * particular order to expect correct hardware operation. The - * macro SPECIAL_REG_WRITE is used to perform such ordered - * writes. Defines UF (Upper First) and LF (Lower First) will + * macro SPECIAL_REG_WRITE is used to perform such ordered + * writes. Defines UF (Upper First) and LF (Lower First) will * be used to specify the required write order. */ #define UF 1 @@ -716,6 +778,7 @@ static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order) #define PCC_FB_ECC_ERR vBIT(0xff, 16, 8) /* Interrupt to indicate PCC_FB_ECC Error. */ +#define RXD_GET_VLAN_TAG(Control_2) (u16)(Control_2 & MASK_VLAN_TAG) /* * Prototype declaration. */ @@ -725,36 +788,29 @@ static void __devexit s2io_rem_nic(struct pci_dev *pdev); static int init_shared_mem(struct s2io_nic *sp); static void free_shared_mem(struct s2io_nic *sp); static int init_nic(struct s2io_nic *nic); -#ifndef CONFIG_S2IO_NAPI -static void rx_intr_handler(struct s2io_nic *sp); -#endif -static void tx_intr_handler(struct s2io_nic *sp); +static void rx_intr_handler(ring_info_t *ring_data); +static void tx_intr_handler(fifo_info_t *fifo_data); static void alarm_intr_handler(struct s2io_nic *sp); static int s2io_starter(void); -static void s2io_closer(void); +void s2io_closer(void); static void s2io_tx_watchdog(struct net_device *dev); static void s2io_tasklet(unsigned long dev_addr); static void s2io_set_multicast(struct net_device *dev); -#ifndef CONFIG_2BUFF_MODE -static int rx_osm_handler(nic_t * sp, u16 len, RxD_t * rxdp, int ring_no); -#else -static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no, - buffAdd_t * ba); -#endif -static void s2io_link(nic_t * sp, int link); -static void s2io_reset(nic_t * sp); -#ifdef CONFIG_S2IO_NAPI +static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp); +void s2io_link(nic_t * sp, int link); +void s2io_reset(nic_t * sp); +#if defined(CONFIG_S2IO_NAPI) static int s2io_poll(struct net_device *dev, int *budget); #endif static void s2io_init_pci(nic_t * sp); -static int s2io_set_mac_addr(struct net_device *dev, u8 * addr); +int s2io_set_mac_addr(struct net_device *dev, u8 * addr); static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs); -static int verify_xena_quiescence(u64 val64, int flag); +static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag); static struct ethtool_ops netdev_ethtool_ops; static void s2io_set_link(unsigned long data); -static int s2io_set_swapper(nic_t * sp); -static void s2io_card_down(nic_t * nic); -static int s2io_card_up(nic_t * nic); - +int s2io_set_swapper(nic_t * sp); +static void s2io_card_down(nic_t *nic); +static int s2io_card_up(nic_t *nic); +int get_xena_rev_id(struct pci_dev *pdev); #endif /* _S2IO_H */ From 5e25b9ddb6683fe225a2266b53d73c57381a0c18 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:27:09 -0700 Subject: [PATCH 066/584] [PATCH] S2io: Hardware fixes Hi, Below patch addresses few h/w specific issues. 1. Check for additional ownership bit on Rx path before starting Rx processing. 2. Enable only 4 PCCs(Per Context Controller) for Xframe I revisions less than 4. 3. Program Rx and Tx round robin registers depending on no. of rings/FIFOs. 4. Tx continous interrupts is now a loadable parameter. 5. Reset the card if we get double-bit ECC errors. 6. A soft reset of XGXS being done to force a link state change has been eliminated. 7. After a reset, clear "parity error detected" bit, PCI-X ECC status register, and PCI_STATUS bit in tx_pic_int register. 8. The error in the disabling allmulticast implementation has been rectified. 9. Leave the PCI-X parameters MMRBC, OST etc. at their BIOS/system defaults. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io-regs.h | 7 + drivers/net/s2io.c | 410 ++++++++++++++++++++++++++++++++-------- drivers/net/s2io.h | 4 + 3 files changed, 341 insertions(+), 80 deletions(-) diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h index 8746740e6efd..826deb0eb03a 100644 --- a/drivers/net/s2io-regs.h +++ b/drivers/net/s2io-regs.h @@ -62,6 +62,7 @@ typedef struct _XENA_dev_config { #define ADAPTER_STATUS_RMAC_REMOTE_FAULT BIT(6) #define ADAPTER_STATUS_RMAC_LOCAL_FAULT BIT(7) #define ADAPTER_STATUS_RMAC_PCC_IDLE vBIT(0xFF,8,8) +#define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE vBIT(0x0F,8,8) #define ADAPTER_STATUS_RC_PRC_QUIESCENT vBIT(0xFF,16,8) #define ADAPTER_STATUS_MC_DRAM_READY BIT(24) #define ADAPTER_STATUS_MC_QUEUES_READY BIT(25) @@ -245,6 +246,7 @@ typedef struct _XENA_dev_config { #define STAT_TRSF_PER(n) TBD #define PER_SEC 0x208d5 #define SET_UPDT_PERIOD(n) vBIT((PER_SEC*n),32,32) +#define SET_UPDT_CLICKS(val) vBIT(val, 32, 32) u64 stat_addr; @@ -289,6 +291,7 @@ typedef struct _XENA_dev_config { u64 pcc_err_reg; #define PCC_FB_ECC_DB_ERR vBIT(0xFF, 16, 8) +#define PCC_ENABLE_FOUR vBIT(0x0F,0,8) u64 pcc_err_mask; u64 pcc_err_alarm; @@ -690,6 +693,10 @@ typedef struct _XENA_dev_config { #define MC_ERR_REG_MIRI_CRI_ERR_0 BIT(22) #define MC_ERR_REG_MIRI_CRI_ERR_1 BIT(23) #define MC_ERR_REG_SM_ERR BIT(31) +#define MC_ERR_REG_ECC_ALL_SNG (BIT(6) | \ + BIT(7) | BIT(17) | BIT(19)) +#define MC_ERR_REG_ECC_ALL_DBL (BIT(14) | \ + BIT(15) | BIT(18) | BIT(20)) u64 mc_err_mask; u64 mc_err_alarm; diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 0721e78dd8b0..e2144fc7df9a 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -68,6 +68,16 @@ static char s2io_driver_name[] = "Neterion"; static char s2io_driver_version[] = "Version 1.7.7"; +static inline int RXD_IS_UP2DT(RxD_t *rxdp) +{ + int ret; + + ret = ((!(rxdp->Control_1 & RXD_OWN_XENA)) && + (GET_RXD_MARKER(rxdp->Control_2) != THE_RXD_MARK)); + + return ret; +} + /* * Cards with following subsystem_id have a link state indication * problem, 600B, 600C, 600D, 640B, 640C and 640D. @@ -230,6 +240,7 @@ static unsigned int rx_ring_sz[MAX_RX_RINGS] = static unsigned int Stats_refresh_time = 4; static unsigned int rts_frm_len[MAX_RX_RINGS] = {[0 ...(MAX_RX_RINGS - 1)] = 0 }; +static unsigned int use_continuous_tx_intrs = 1; static unsigned int rmac_pause_time = 65535; static unsigned int mc_pause_threshold_q0q3 = 187; static unsigned int mc_pause_threshold_q4q7 = 187; @@ -638,7 +649,7 @@ static int init_nic(struct s2io_nic *nic) mac_control = &nic->mac_control; config = &nic->config; - /* to set the swapper control on the card */ + /* to set the swapper controle on the card */ if(s2io_set_swapper(nic)) { DBG_PRINT(ERR_DBG,"ERROR: Setting Swapper failed\n"); return -1; @@ -756,6 +767,13 @@ static int init_nic(struct s2io_nic *nic) val64 |= BIT(0); /* To enable the FIFO partition. */ writeq(val64, &bar0->tx_fifo_partition_0); + /* + * Disable 4 PCCs for Xena1, 2 and 3 as per H/W bug + * SXE-008 TRANSMIT DMA ARBITRATION ISSUE. + */ + if (get_xena_rev_id(nic->pdev) < 4) + writeq(PCC_ENABLE_FOUR, &bar0->pcc_enable); + val64 = readq(&bar0->tx_fifo_partition_0); DBG_PRINT(INIT_DBG, "Fifo partition at: 0x%p is: 0x%llx\n", &bar0->tx_fifo_partition_0, (unsigned long long) val64); @@ -823,37 +841,250 @@ static int init_nic(struct s2io_nic *nic) } writeq(val64, &bar0->rx_queue_cfg); - /* Initializing the Tx round robin registers to 0 - * filling tx and rx round robin registers as per - * the number of FIFOs and Rings is still TODO - */ - writeq(0, &bar0->tx_w_round_robin_0); - writeq(0, &bar0->tx_w_round_robin_1); - writeq(0, &bar0->tx_w_round_robin_2); - writeq(0, &bar0->tx_w_round_robin_3); - writeq(0, &bar0->tx_w_round_robin_4); - /* - * TODO - * Disable Rx steering. Hard coding all packets to be steered to - * Queue 0 for now. + * Filling Tx round robin registers + * as per the number of FIFOs */ - val64 = 0x8080808080808080ULL; - writeq(val64, &bar0->rts_qos_steering); + switch (config->tx_fifo_num) { + case 1: + val64 = 0x0000000000000000ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + writeq(val64, &bar0->tx_w_round_robin_1); + writeq(val64, &bar0->tx_w_round_robin_2); + writeq(val64, &bar0->tx_w_round_robin_3); + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 2: + val64 = 0x0000010000010000ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0100000100000100ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0001000001000001ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0000010000010000ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0100000000000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 3: + val64 = 0x0001000102000001ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0001020000010001ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0200000100010200ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0001000102000001ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0001020000000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 4: + val64 = 0x0001020300010200ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0100000102030001ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0200010000010203ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0001020001000001ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0203000100000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 5: + val64 = 0x0001000203000102ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0001020001030004ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0001000203000102ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0001020001030004ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0001000000000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 6: + val64 = 0x0001020304000102ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0304050001020001ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0203000100000102ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0304000102030405ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0001000200000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 7: + val64 = 0x0001020001020300ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0102030400010203ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0405060001020001ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0304050000010200ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0102030000000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + case 8: + val64 = 0x0001020300040105ULL; + writeq(val64, &bar0->tx_w_round_robin_0); + val64 = 0x0200030106000204ULL; + writeq(val64, &bar0->tx_w_round_robin_1); + val64 = 0x0103000502010007ULL; + writeq(val64, &bar0->tx_w_round_robin_2); + val64 = 0x0304010002060500ULL; + writeq(val64, &bar0->tx_w_round_robin_3); + val64 = 0x0103020400000000ULL; + writeq(val64, &bar0->tx_w_round_robin_4); + break; + } + + /* Filling the Rx round robin registers as per the + * number of Rings and steering based on QoS. + */ + switch (config->rx_ring_num) { + case 1: + val64 = 0x8080808080808080ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 2: + val64 = 0x0000010000010000ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0100000100000100ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0001000001000001ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0000010000010000ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0100000000000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080808040404040ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 3: + val64 = 0x0001000102000001ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0001020000010001ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0200000100010200ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0001000102000001ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0001020000000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080804040402020ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 4: + val64 = 0x0001020300010200ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0100000102030001ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0200010000010203ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0001020001000001ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0203000100000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080404020201010ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 5: + val64 = 0x0001000203000102ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0001020001030004ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0001000203000102ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0001020001030004ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0001000000000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080404020201008ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 6: + val64 = 0x0001020304000102ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0304050001020001ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0203000100000102ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0304000102030405ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0001000200000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080404020100804ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 7: + val64 = 0x0001020001020300ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0102030400010203ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0405060001020001ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0304050000010200ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0102030000000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8080402010080402ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + case 8: + val64 = 0x0001020300040105ULL; + writeq(val64, &bar0->rx_w_round_robin_0); + val64 = 0x0200030106000204ULL; + writeq(val64, &bar0->rx_w_round_robin_1); + val64 = 0x0103000502010007ULL; + writeq(val64, &bar0->rx_w_round_robin_2); + val64 = 0x0304010002060500ULL; + writeq(val64, &bar0->rx_w_round_robin_3); + val64 = 0x0103020400000000ULL; + writeq(val64, &bar0->rx_w_round_robin_4); + + val64 = 0x8040201008040201ULL; + writeq(val64, &bar0->rts_qos_steering); + break; + } /* UDP Fix */ val64 = 0; for (i = 0; i < 8; i++) writeq(val64, &bar0->rts_frm_len_n[i]); - /* Set the default rts frame length for ring0 */ - writeq(MAC_RTS_FRM_LEN_SET(dev->mtu+22), - &bar0->rts_frm_len_n[0]); + /* Set the default rts frame length for the rings configured */ + val64 = MAC_RTS_FRM_LEN_SET(dev->mtu+22); + for (i = 0 ; i < config->rx_ring_num ; i++) + writeq(val64, &bar0->rts_frm_len_n[i]); + + /* Set the frame length for the configured rings + * desired by the user + */ + for (i = 0; i < config->rx_ring_num; i++) { + /* If rts_frm_len[i] == 0 then it is assumed that user not + * specified frame length steering. + * If the user provides the frame length then program + * the rts_frm_len register for those values or else + * leave it as it is. + */ + if (rts_frm_len[i] != 0) { + writeq(MAC_RTS_FRM_LEN_SET(rts_frm_len[i]), + &bar0->rts_frm_len_n[i]); + } + } /* Program statistics memory */ writeq(mac_control->stats_mem_phy, &bar0->stat_addr); val64 = SET_UPDT_PERIOD(Stats_refresh_time) | - STAT_CFG_STAT_RO | STAT_CFG_STAT_EN; + STAT_CFG_STAT_RO | STAT_CFG_STAT_EN; writeq(val64, &bar0->stat_cfg); /* @@ -877,13 +1108,14 @@ static int init_nic(struct s2io_nic *nic) val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078) | TTI_DATA1_MEM_TX_URNG_A(0xA) | TTI_DATA1_MEM_TX_URNG_B(0x10) | - TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN | - TTI_DATA1_MEM_TX_TIMER_CI_EN; + TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN; + if (use_continuous_tx_intrs) + val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN; writeq(val64, &bar0->tti_data1_mem); val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) | TTI_DATA2_MEM_TX_UFC_B(0x20) | - TTI_DATA2_MEM_TX_UFC_C(0x40) | TTI_DATA2_MEM_TX_UFC_D(0x80); + TTI_DATA2_MEM_TX_UFC_C(0x70) | TTI_DATA2_MEM_TX_UFC_D(0x80); writeq(val64, &bar0->tti_data2_mem); val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD; @@ -927,10 +1159,11 @@ static int init_nic(struct s2io_nic *nic) writeq(val64, &bar0->rti_command_mem); /* - * Once the operation completes, the Strobe bit of the command - * register will be reset. We poll for this particular condition - * We wait for a maximum of 500ms for the operation to complete, - * if it's not complete by then we return error. + * Once the operation completes, the Strobe bit of the + * command register will be reset. We poll for this + * particular condition. We wait for a maximum of 500ms + * for the operation to complete, if it's not complete + * by then we return error. */ time = 0; while (TRUE) { @@ -1185,10 +1418,10 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); /* - * All MC block error interrupts are disabled for now. - * TODO + * Enable all MC Intrs. */ - writeq(DISABLE_ALL_INTRS, &bar0->mc_int_mask); + writeq(0x0, &bar0->mc_int_mask); + writeq(0x0, &bar0->mc_err_mask); } else if (flag == DISABLE_INTRS) { /* * Disable MC Intrs in the general intr mask register @@ -1247,23 +1480,41 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) } } -static int check_prc_pcc_state(u64 val64, int flag) +static int check_prc_pcc_state(u64 val64, int flag, int rev_id) { int ret = 0; if (flag == FALSE) { - if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) && - ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == - ADAPTER_STATUS_RC_PRC_QUIESCENT)) { - ret = 1; + if (rev_id >= 4) { + if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) && + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT)) { + ret = 1; + } + } else { + if (!(val64 & ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) && + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT)) { + ret = 1; + } } } else { - if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) == - ADAPTER_STATUS_RMAC_PCC_IDLE) && - (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || - ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == - ADAPTER_STATUS_RC_PRC_QUIESCENT))) { - ret = 1; + if (rev_id >= 4) { + if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) == + ADAPTER_STATUS_RMAC_PCC_IDLE) && + (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT))) { + ret = 1; + } + } else { + if (((val64 & ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) == + ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) && + (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || + ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == + ADAPTER_STATUS_RC_PRC_QUIESCENT))) { + ret = 1; + } } } @@ -1286,6 +1537,7 @@ static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag) { int ret = 0; u64 tmp64 = ~((u64) val64); + int rev_id = get_xena_rev_id(sp->pdev); if (! (tmp64 & @@ -1294,7 +1546,7 @@ static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag) ADAPTER_STATUS_PIC_QUIESCENT | ADAPTER_STATUS_MC_DRAM_READY | ADAPTER_STATUS_MC_QUEUES_READY | ADAPTER_STATUS_M_PLL_LOCK | ADAPTER_STATUS_P_PLL_LOCK))) { - ret = check_prc_pcc_state(val64, flag); + ret = check_prc_pcc_state(val64, flag, rev_id); } return ret; @@ -1407,7 +1659,7 @@ static int start_nic(struct s2io_nic *nic) /* Enable select interrupts */ interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR | - RX_MAC_INTR; + RX_MAC_INTR | MC_INTR; en_dis_able_nic_intrs(nic, interruptible, ENABLE_INTRS); /* @@ -1439,21 +1691,6 @@ static int start_nic(struct s2io_nic *nic) */ schedule_work(&nic->set_link_task); - /* - * Here we are performing soft reset on XGXS to - * force link down. Since link is already up, we will get - * link state change interrupt after this reset - */ - SPECIAL_REG_WRITE(0x80010515001E0000ULL, &bar0->dtx_control, UF); - val64 = readq(&bar0->dtx_control); - udelay(50); - SPECIAL_REG_WRITE(0x80010515001E00E0ULL, &bar0->dtx_control, UF); - val64 = readq(&bar0->dtx_control); - udelay(50); - SPECIAL_REG_WRITE(0x80070515001F00E4ULL, &bar0->dtx_control, UF); - val64 = readq(&bar0->dtx_control); - udelay(50); - return SUCCESS; } @@ -1524,7 +1761,7 @@ static void stop_nic(struct s2io_nic *nic) /* Disable all interrupts */ interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR | - RX_MAC_INTR; + RX_MAC_INTR | MC_INTR; en_dis_able_nic_intrs(nic, interruptible, DISABLE_INTRS); /* Disable PRCs */ @@ -1737,6 +1974,7 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no) off++; mac_control->rings[ring_no].rx_curr_put_info.offset = off; #endif + rxdp->Control_2 |= SET_RXD_MARKER; atomic_inc(&nic->rx_bufs_left[ring_no]); alloc_tab++; @@ -1965,11 +2203,8 @@ static void rx_intr_handler(ring_info_t *ring_data) put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) + put_info.offset; #endif - while ((!(rxdp->Control_1 & RXD_OWN_XENA)) && -#ifdef CONFIG_2BUFF_MODE - (!rxdp->Control_2 & BIT(0)) && -#endif - (((get_offset + 1) % ring_bufs) != put_offset)) { + while (RXD_IS_UP2DT(rxdp) && + (((get_offset + 1) % ring_bufs) != put_offset)) { skb = (struct sk_buff *) ((unsigned long)rxdp->Host_Control); if (skb == NULL) { DBG_PRINT(ERR_DBG, "%s: The skb is ", @@ -2153,6 +2388,21 @@ static void alarm_intr_handler(struct s2io_nic *nic) schedule_work(&nic->set_link_task); } + /* Handling Ecc errors */ + val64 = readq(&bar0->mc_err_reg); + writeq(val64, &bar0->mc_err_reg); + if (val64 & (MC_ERR_REG_ECC_ALL_SNG | MC_ERR_REG_ECC_ALL_DBL)) { + if (val64 & MC_ERR_REG_ECC_ALL_DBL) { + DBG_PRINT(ERR_DBG, "%s: Device indicates ", + dev->name); + DBG_PRINT(ERR_DBG, "double ECC error!!\n"); + netif_stop_queue(dev); + schedule_work(&nic->rst_timer_task); + } else { + /* Device can recover from Single ECC errors */ + } + } + /* In case of a serious error, the device will be Reset. */ val64 = readq(&bar0->serr_source); if (val64 & SERR_SOURCE_ANY) { @@ -2226,7 +2476,7 @@ void s2io_reset(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64; - u16 subid; + u16 subid, pci_cmd; val64 = SW_RESET_ALL; writeq(val64, &bar0->sw_reset); @@ -2255,6 +2505,18 @@ void s2io_reset(nic_t * sp) /* Set swapper to enable I/O register access */ s2io_set_swapper(sp); + /* Clear certain PCI/PCI-X fields after reset */ + pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); + pci_cmd &= 0x7FFF; /* Clear parity err detect bit */ + pci_write_config_word(sp->pdev, PCI_COMMAND, pci_cmd); + + val64 = readq(&bar0->txpic_int_reg); + val64 &= ~BIT(62); /* Clearing PCI_STATUS error reflected here */ + writeq(val64, &bar0->txpic_int_reg); + + /* Clearing PCIX Ecc status register */ + pci_write_config_dword(sp->pdev, 0x68, 0); + /* Reset device statistics maintained by OS */ memset(&sp->stats, 0, sizeof (struct net_device_stats)); @@ -2797,6 +3059,8 @@ static void s2io_set_multicast(struct net_device *dev) /* Disable all Multicast addresses */ writeq(RMAC_ADDR_DATA0_MEM_ADDR(dis_addr), &bar0->rmac_addr_data0_mem); + writeq(RMAC_ADDR_DATA1_MEM_MASK(0x0), + &bar0->rmac_addr_data1_mem); val64 = RMAC_ADDR_CMD_MEM_WE | RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD | RMAC_ADDR_CMD_MEM_OFFSET(sp->all_multi_pos); @@ -4369,21 +4633,6 @@ static void s2io_init_pci(nic_t * sp) (pci_cmd | PCI_COMMAND_PARITY)); pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); - /* Set MMRB count to 1024 in PCI-X Command register. */ - pcix_cmd &= 0xFFF3; - pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - (pcix_cmd | (0x1 << 2))); /* MMRBC 1K */ - pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(pcix_cmd)); - - /* Setting Maximum outstanding splits based on system type. */ - pcix_cmd &= 0xFF8F; - pcix_cmd |= XENA_MAX_OUTSTANDING_SPLITS(0x1); /* 2 splits. */ - pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - pcix_cmd); - pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, - &(pcix_cmd)); - /* Forcibly disabling relaxed ordering capability of the card. */ pcix_cmd &= 0xfffd; pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, @@ -4400,6 +4649,7 @@ module_param_array(tx_fifo_len, uint, NULL, 0); module_param_array(rx_ring_sz, uint, NULL, 0); module_param(Stats_refresh_time, int, 0); module_param_array(rts_frm_len, uint, NULL, 0); +module_param(use_continuous_tx_intrs, int, 1); module_param(rmac_pause_time, int, 0); module_param(mc_pause_threshold_q0q3, int, 0); module_param(mc_pause_threshold_q4q7, int, 0); diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 4d2fc7a40434..92db59a0fb11 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -372,6 +372,10 @@ typedef struct _RxD_t { #define RXD_GET_L4_CKSUM(val) ((u16)(val) & 0xFFFF) u64 Control_2; +#define THE_RXD_MARK 0x3 +#define SET_RXD_MARKER vBIT(THE_RXD_MARK, 0, 2) +#define GET_RXD_MARKER(ctrl) ((ctrl & SET_RXD_MARKER) >> 62) + #ifndef CONFIG_2BUFF_MODE #define MASK_BUFFER0_SIZE vBIT(0x3FFF,2,14) #define SET_BUFFER0_SIZE(val) vBIT(val,2,14) From 7ba013ac029513eb4b70cfcd4b86e37c5f16c483 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:29:20 -0700 Subject: [PATCH 067/584] [PATCH] S2io: Software fixes Hi, Below patch includes fixes for few purely software bugs identified since last release. 1. Keep track and display(as part of ethtool command output) the no. of single-bit and double-bit ECC errors. 2. Handle race condition between intr handler and "interface down" routine. 3. Initial link state setting modified so that the link state displayed after "interface Up" is correct. 4. Fix for "Incorrect Tx packet count when TSO is enabled". 5. Disable periodic DMA of statistics and schedule one-shot DMA only when required. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 97 +++++++++++++++++++++++++++++++++++++++------- drivers/net/s2io.h | 5 +++ 2 files changed, 89 insertions(+), 13 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e2144fc7df9a..e24c5e544734 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -158,6 +158,9 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { {"rmac_pause_cnt"}, {"rmac_accepted_ip"}, {"rmac_err_tcp"}, + {"\n DRIVER STATISTICS"}, + {"single_bit_ecc_errs"}, + {"double_bit_ecc_errs"}, }; #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN @@ -237,7 +240,6 @@ static unsigned int tx_fifo_len[MAX_TX_FIFOS] = static unsigned int rx_ring_num = 1; static unsigned int rx_ring_sz[MAX_RX_RINGS] = {[0 ...(MAX_RX_RINGS - 1)] = 0 }; -static unsigned int Stats_refresh_time = 4; static unsigned int rts_frm_len[MAX_RX_RINGS] = {[0 ...(MAX_RX_RINGS - 1)] = 0 }; static unsigned int use_continuous_tx_intrs = 1; @@ -1083,9 +1085,6 @@ static int init_nic(struct s2io_nic *nic) /* Program statistics memory */ writeq(mac_control->stats_mem_phy, &bar0->stat_addr); - val64 = SET_UPDT_PERIOD(Stats_refresh_time) | - STAT_CFG_STAT_RO | STAT_CFG_STAT_EN; - writeq(val64, &bar0->stat_cfg); /* * Initializing the sampling rate for the device to calculate the @@ -2101,6 +2100,7 @@ static int s2io_poll(struct net_device *dev, int *budget) u64 val64; int i; + atomic_inc(&nic->isr_cnt); mac_control = &nic->mac_control; config = &nic->config; @@ -2136,6 +2136,7 @@ static int s2io_poll(struct net_device *dev, int *budget) } /* Re enable the Rx interrupts. */ en_dis_able_nic_intrs(nic, RX_TRAFFIC_INTR, ENABLE_INTRS); + atomic_dec(&nic->isr_cnt); return 0; no_rx: @@ -2149,6 +2150,7 @@ no_rx: break; } } + atomic_dec(&nic->isr_cnt); return 1; } #endif @@ -2179,6 +2181,13 @@ static void rx_intr_handler(ring_info_t *ring_data) #endif register u64 val64; + spin_lock(&nic->rx_lock); + if (atomic_read(&nic->card_state) == CARD_DOWN) { + DBG_PRINT(ERR_DBG, "%s: %s going down for reset\n", + __FUNCTION__, dev->name); + spin_unlock(&nic->rx_lock); + } + /* * rx_traffic_int reg is an R1 register, hence we read and write * back the same value in the register to clear it @@ -2210,6 +2219,7 @@ static void rx_intr_handler(ring_info_t *ring_data) DBG_PRINT(ERR_DBG, "%s: The skb is ", dev->name); DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); + spin_unlock(&nic->rx_lock); return; } #ifndef CONFIG_2BUFF_MODE @@ -2262,6 +2272,7 @@ static void rx_intr_handler(ring_info_t *ring_data) break; #endif } + spin_unlock(&nic->rx_lock); } /** @@ -2345,7 +2356,6 @@ static void tx_intr_handler(fifo_info_t *fifo_data) (sizeof(TxD_t) * fifo_data->max_txds)); /* Updating the statistics block */ - nic->stats.tx_packets++; nic->stats.tx_bytes += skb->len; dev_kfree_skb_irq(skb); @@ -2393,13 +2403,16 @@ static void alarm_intr_handler(struct s2io_nic *nic) writeq(val64, &bar0->mc_err_reg); if (val64 & (MC_ERR_REG_ECC_ALL_SNG | MC_ERR_REG_ECC_ALL_DBL)) { if (val64 & MC_ERR_REG_ECC_ALL_DBL) { + nic->mac_control.stats_info->sw_stat. + double_ecc_errs++; DBG_PRINT(ERR_DBG, "%s: Device indicates ", dev->name); DBG_PRINT(ERR_DBG, "double ECC error!!\n"); netif_stop_queue(dev); schedule_work(&nic->rst_timer_task); } else { - /* Device can recover from Single ECC errors */ + nic->mac_control.stats_info->sw_stat. + single_ecc_errs++; } } @@ -2695,7 +2708,7 @@ int s2io_open(struct net_device *dev) * Nic is initialized */ netif_carrier_off(dev); - sp->last_link_state = LINK_DOWN; + sp->last_link_state = 0; /* Unkown link state */ /* Initialize H/W and enable interrupts */ if (s2io_card_up(sp)) { @@ -2909,6 +2922,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) mac_info_t *mac_control; struct config_param *config; + atomic_inc(&sp->isr_cnt); mac_control = &sp->mac_control; config = &sp->config; @@ -2924,6 +2938,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) if (!reason) { /* The interrupt was not raised by Xena. */ + atomic_dec(&sp->isr_cnt); return IRQ_NONE; } @@ -2972,6 +2987,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) dev->name); DBG_PRINT(ERR_DBG, " in ISR!!\n"); clear_bit(0, (&sp->tasklet_status)); + atomic_dec(&sp->isr_cnt); return IRQ_HANDLED; } clear_bit(0, (&sp->tasklet_status)); @@ -2981,9 +2997,36 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) } #endif + atomic_dec(&sp->isr_cnt); return IRQ_HANDLED; } +/** + * s2io_updt_stats - + */ +static void s2io_updt_stats(nic_t *sp) +{ + XENA_dev_config_t __iomem *bar0 = sp->bar0; + u64 val64; + int cnt = 0; + + if (atomic_read(&sp->card_state) == CARD_UP) { + /* Apprx 30us on a 133 MHz bus */ + val64 = SET_UPDT_CLICKS(10) | + STAT_CFG_ONE_SHOT_EN | STAT_CFG_STAT_EN; + writeq(val64, &bar0->stat_cfg); + do { + udelay(100); + val64 = readq(&bar0->stat_cfg); + if (!(val64 & BIT(0))) + break; + cnt++; + if (cnt == 5) + break; /* Updt failed */ + } while(1); + } +} + /** * s2io_get_stats - Updates the device statistics structure. * @dev : pointer to the device structure. @@ -3004,6 +3047,11 @@ struct net_device_stats *s2io_get_stats(struct net_device *dev) mac_control = &sp->mac_control; config = &sp->config; + /* Configure Stats for immediate updt */ + s2io_updt_stats(sp); + + sp->stats.tx_packets = + le32_to_cpu(mac_control->stats_info->tmac_frms); sp->stats.tx_errors = le32_to_cpu(mac_control->stats_info->tmac_any_err_frms); sp->stats.rx_errors = @@ -4018,6 +4066,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev, nic_t *sp = dev->priv; StatInfo_t *stat_info = sp->mac_control.stats_info; + s2io_updt_stats(sp); tmp_stats[i++] = le32_to_cpu(stat_info->tmac_frms); tmp_stats[i++] = le32_to_cpu(stat_info->tmac_data_octets); tmp_stats[i++] = le64_to_cpu(stat_info->tmac_drop_frms); @@ -4057,6 +4106,9 @@ static void s2io_get_ethtool_stats(struct net_device *dev, tmp_stats[i++] = le32_to_cpu(stat_info->rmac_pause_cnt); tmp_stats[i++] = le32_to_cpu(stat_info->rmac_accepted_ip); tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_tcp); + tmp_stats[i++] = 0; + tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs; + tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs; } int s2io_ethtool_get_regs_len(struct net_device *dev) @@ -4353,14 +4405,27 @@ static void s2io_card_down(nic_t * sp) break; } } while (1); - spin_lock_irqsave(&sp->tx_lock, flags); s2io_reset(sp); - /* Free all unused Tx and Rx buffers */ - free_tx_buffers(sp); - free_rx_buffers(sp); + /* Waiting till all Interrupt handlers are complete */ + cnt = 0; + do { + msleep(10); + if (!atomic_read(&sp->isr_cnt)) + break; + cnt++; + } while(cnt < 5); + spin_lock_irqsave(&sp->tx_lock, flags); + /* Free all Tx buffers */ + free_tx_buffers(sp); spin_unlock_irqrestore(&sp->tx_lock, flags); + + /* Free all Rx buffers */ + spin_lock_irqsave(&sp->rx_lock, flags); + free_rx_buffers(sp); + spin_unlock_irqrestore(&sp->rx_lock, flags); + clear_bit(0, &(sp->link_state)); } @@ -4647,7 +4712,6 @@ module_param(tx_fifo_num, int, 0); module_param(rx_ring_num, int, 0); module_param_array(tx_fifo_len, uint, NULL, 0); module_param_array(rx_ring_sz, uint, NULL, 0); -module_param(Stats_refresh_time, int, 0); module_param_array(rts_frm_len, uint, NULL, 0); module_param(use_continuous_tx_intrs, int, 1); module_param(rmac_pause_time, int, 0); @@ -4804,6 +4868,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) for (i = 0; i < config->rx_ring_num; i++) atomic_set(&sp->rx_bufs_left[i], 0); + /* Initialize the number of ISRs currently running */ + atomic_set(&sp->isr_cnt, 0); + /* initialize the shared memory used by the NIC and the host */ if (init_shared_mem(sp)) { DBG_PRINT(ERR_DBG, "%s: Memory allocation failed\n", @@ -4938,6 +5005,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) #ifndef CONFIG_S2IO_NAPI spin_lock_init(&sp->put_lock); #endif + spin_lock_init(&sp->rx_lock); /* * SXE-002: Configure link and activity LED to init state @@ -4961,13 +5029,16 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) goto register_failed; } + /* Initialize device name */ + strcpy(sp->name, dev->name); + strcat(sp->name, ": Neterion Xframe I 10GbE adapter"); + /* * Make Link state as off at this point, when the Link change * interrupt comes the state will be automatically changed to * the right state. */ netif_carrier_off(dev); - sp->last_link_state = LINK_DOWN; return 0; diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 92db59a0fb11..69dd0e51dda0 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -195,6 +195,9 @@ typedef struct stat_block { u32 rxd_rd_cnt; u32 rxf_wr_cnt; u32 txf_rd_cnt; + +/* Software statistics maintained by driver */ + swStat_t sw_stat; } StatInfo_t; /* @@ -678,6 +681,8 @@ struct s2io_nic { #define CARD_UP 2 atomic_t card_state; volatile unsigned long link_state; + spinlock_t rx_lock; + atomic_t isr_cnt; }; #define RESET_ERROR 1; From 1ddc50d40a19b3524d302d1d6bfd52ac7bc6b6f7 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:30:43 -0700 Subject: [PATCH 068/584] [PATCH] S2io: Removed memory leaks Hi, This patch fixes certain memory leaks discovered in free_tx_buffers() and rx_osm_handler() Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e24c5e544734..6668b99025c8 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -1709,7 +1709,7 @@ static void free_tx_buffers(struct s2io_nic *nic) int i, j; mac_info_t *mac_control; struct config_param *config; - int cnt = 0; + int cnt = 0, frg_cnt; mac_control = &nic->mac_control; config = &nic->config; @@ -1722,11 +1722,33 @@ static void free_tx_buffers(struct s2io_nic *nic) (struct sk_buff *) ((unsigned long) txdp-> Host_Control); if (skb == NULL) { - memset(txdp, 0, sizeof(TxD_t)); + memset(txdp, 0, sizeof(TxD_t) * + config->max_txds); continue; } + frg_cnt = skb_shinfo(skb)->nr_frags; + pci_unmap_single(nic->pdev, (dma_addr_t) + txdp->Buffer_Pointer, + skb->len - skb->data_len, + PCI_DMA_TODEVICE); + if (frg_cnt) { + TxD_t *temp; + temp = txdp; + txdp++; + for (j = 0; j < frg_cnt; j++, txdp++) { + skb_frag_t *frag = + &skb_shinfo(skb)->frags[j]; + pci_unmap_page(nic->pdev, + (dma_addr_t) + txdp-> + Buffer_Pointer, + frag->size, + PCI_DMA_TODEVICE); + } + txdp = temp; + } dev_kfree_skb(skb); - memset(txdp, 0, sizeof(TxD_t)); + memset(txdp, 0, sizeof(TxD_t) * config->max_txds); cnt++; } DBG_PRINT(INTR_DBG, @@ -4570,6 +4592,11 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) unsigned long long err = rxdp->Control_1 & RXD_T_CODE; DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n", dev->name, err); + dev_kfree_skb(skb); + sp->stats.rx_crc_errors++; + atomic_dec(&sp->rx_bufs_left[ring_no]); + rxdp->Host_Control = 0; + return 0; } /* Updating statistics */ From fe113638328995b69d8797e6466b29661b1602d1 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:32:00 -0700 Subject: [PATCH 069/584] [PATCH] S2io: Performance improvements Hi, This patch relates to mostly performance related changes. 1. Fixed incorrect computation of PANIC level in rx_buffer_level(). 2. Removed unnecessary PIOs(read/write of tx_traffic_int and rx_traffic_int) from interrupt handler and removed read of general_int_status register from xmit routine. 3. Enable two-buffer mode(for Rx path) automatically for SGI systems. This improves Rx performance dramatically on SGI systems. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 46 +++++++++++++++++++--------------------------- drivers/net/s2io.h | 5 +++++ 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 6668b99025c8..28d6d3746c80 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -100,8 +100,7 @@ static inline int rx_buffer_level(nic_t * sp, int rxb_size, int ring) mac_control = &sp->mac_control; if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16) { level = LOW; - if ((mac_control->rings[ring].pkt_cnt - rxb_size) < - MAX_RXDS_PER_BLOCK) { + if (rxb_size <= MAX_RXDS_PER_BLOCK) { level = PANIC; } } @@ -2193,7 +2192,6 @@ static void rx_intr_handler(ring_info_t *ring_data) { nic_t *nic = ring_data->nic; struct net_device *dev = (struct net_device *) nic->dev; - XENA_dev_config_t __iomem *bar0 = nic->bar0; int get_block, get_offset, put_block, put_offset, ring_bufs; rx_curr_get_info_t get_info, put_info; RxD_t *rxdp; @@ -2201,8 +2199,6 @@ static void rx_intr_handler(ring_info_t *ring_data) #ifndef CONFIG_S2IO_NAPI int pkt_cnt = 0; #endif - register u64 val64; - spin_lock(&nic->rx_lock); if (atomic_read(&nic->card_state) == CARD_DOWN) { DBG_PRINT(ERR_DBG, "%s: %s going down for reset\n", @@ -2210,13 +2206,6 @@ static void rx_intr_handler(ring_info_t *ring_data) spin_unlock(&nic->rx_lock); } - /* - * rx_traffic_int reg is an R1 register, hence we read and write - * back the same value in the register to clear it - */ - val64 = readq(&bar0->tx_traffic_int); - writeq(val64, &bar0->tx_traffic_int); - get_info = ring_data->rx_curr_get_info; get_block = get_info.block_index; put_info = ring_data->rx_curr_put_info; @@ -2312,20 +2301,11 @@ static void rx_intr_handler(ring_info_t *ring_data) static void tx_intr_handler(fifo_info_t *fifo_data) { nic_t *nic = fifo_data->nic; - XENA_dev_config_t __iomem *bar0 = nic->bar0; struct net_device *dev = (struct net_device *) nic->dev; tx_curr_get_info_t get_info, put_info; struct sk_buff *skb; TxD_t *txdlp; u16 j, frg_cnt; - register u64 val64 = 0; - - /* - * tx_traffic_int reg is an R1 register, hence we read and write - * back the same value in the register to clear it - */ - val64 = readq(&bar0->tx_traffic_int); - writeq(val64, &bar0->tx_traffic_int); get_info = fifo_data->tx_curr_get_info; put_info = fifo_data->tx_curr_put_info; @@ -2818,7 +2798,6 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) #endif mac_info_t *mac_control; struct config_param *config; - XENA_dev_config_t __iomem *bar0 = sp->bar0; mac_control = &sp->mac_control; config = &sp->config; @@ -2870,7 +2849,6 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) } txdp->Control_2 |= config->tx_intr_type; - txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) | TXD_GATHER_CODE_FIRST); txdp->Control_1 |= TXD_LIST_OWN_XENA; @@ -2890,6 +2868,8 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr; writeq(val64, &tx_fifo->TxDL_Pointer); + wmb(); + val64 = (TX_FIFO_LAST_TXD_NUM(frg_cnt) | TX_FIFO_FIRST_LIST | TX_FIFO_LAST_LIST); @@ -2899,9 +2879,6 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) #endif writeq(val64, &tx_fifo->List_Control); - /* Perform a PCI read to flush previous writes */ - val64 = readq(&bar0->general_int_status); - put_off++; put_off %= mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; mac_control->fifos[queue].tx_curr_put_info.offset = put_off; @@ -2940,7 +2917,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) nic_t *sp = dev->priv; XENA_dev_config_t __iomem *bar0 = sp->bar0; int i; - u64 reason = 0; + u64 reason = 0, val64; mac_info_t *mac_control; struct config_param *config; @@ -2978,6 +2955,13 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) #else /* If Intr is because of Rx Traffic */ if (reason & GEN_INTR_RXTRAFFIC) { + /* + * rx_traffic_int reg is an R1 register, writing all 1's + * will ensure that the actual interrupt causing bit get's + * cleared and hence a read can be avoided. + */ + val64 = 0xFFFFFFFFFFFFFFFFULL; + writeq(val64, &bar0->rx_traffic_int); for (i = 0; i < config->rx_ring_num; i++) { rx_intr_handler(&mac_control->rings[i]); } @@ -2986,6 +2970,14 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) /* If Intr is because of Tx Traffic */ if (reason & GEN_INTR_TXTRAFFIC) { + /* + * tx_traffic_int reg is an R1 register, writing all 1's + * will ensure that the actual interrupt causing bit get's + * cleared and hence a read can be avoided. + */ + val64 = 0xFFFFFFFFFFFFFFFFULL; + writeq(val64, &bar0->tx_traffic_int); + for (i = 0; i < config->tx_fifo_num; i++) tx_intr_handler(&mac_control->fifos[i]); } diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 69dd0e51dda0..ce9bf6d5ee00 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -13,6 +13,11 @@ #ifndef _S2IO_H #define _S2IO_H +/* Enable 2 buffer mode by default for SGI system */ +#ifdef CONFIG_IA64_SGI_SN2 +#define CONFIG_2BUFF_MODE +#endif + #define TBD 0 #define BIT(loc) (0x8000000000000000ULL >> (loc)) #define vBIT(val, loc, sz) (((u64)val) << (64-loc-sz)) From d8892c6ee39614bc6d282dbef0ff9fa461a6467c Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:33:12 -0700 Subject: [PATCH 070/584] [PATCH] S2io: Support for runtime MTU change Hi, Patch below supports MTU change on-the-fly(without bringing interface down) Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 28d6d3746c80..aff1fb74e14b 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2849,6 +2849,7 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) } txdp->Control_2 |= config->tx_intr_type; + txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) | TXD_GATHER_CODE_FIRST); txdp->Control_1 |= TXD_LIST_OWN_XENA; @@ -4246,14 +4247,6 @@ int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) int s2io_change_mtu(struct net_device *dev, int new_mtu) { nic_t *sp = dev->priv; - XENA_dev_config_t __iomem *bar0 = sp->bar0; - register u64 val64; - - if (netif_running(dev)) { - DBG_PRINT(ERR_DBG, "%s: Must be stopped to ", dev->name); - DBG_PRINT(ERR_DBG, "change its MTU\n"); - return -EBUSY; - } if ((new_mtu < MIN_MTU) || (new_mtu > S2IO_JUMBO_SIZE)) { DBG_PRINT(ERR_DBG, "%s: MTU size is invalid.\n", @@ -4261,11 +4254,22 @@ int s2io_change_mtu(struct net_device *dev, int new_mtu) return -EPERM; } - /* Set the new MTU into the PYLD register of the NIC */ - val64 = new_mtu; - writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len); - dev->mtu = new_mtu; + if (netif_running(dev)) { + s2io_card_down(sp); + netif_stop_queue(dev); + if (s2io_card_up(sp)) { + DBG_PRINT(ERR_DBG, "%s: Device bring up failed\n", + __FUNCTION__); + } + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); + } else { /* Device is down */ + XENA_dev_config_t __iomem *bar0 = sp->bar0; + u64 val64 = new_mtu; + + writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len); + } return 0; } From 25fff88eb7dbc63e03f1766e130515900d440dbb Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:34:11 -0700 Subject: [PATCH 071/584] [PATCH] S2io: Timer based slowpath handling Hi, This patch implements the slow-path handling functions(link state change, hardware errors) as a timer. It is not handled in interrupt handler as was done previously. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 22 +++++++++++++++++++--- drivers/net/s2io.h | 4 ++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index aff1fb74e14b..ee498d248d38 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -168,6 +168,12 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { #define S2IO_TEST_LEN sizeof(s2io_gstrings) / ETH_GSTRING_LEN #define S2IO_STRINGS_LEN S2IO_TEST_LEN * ETH_GSTRING_LEN +#define S2IO_TIMER_CONF(timer, handle, arg, exp) \ + init_timer(&timer); \ + timer.function = handle; \ + timer.data = (unsigned long) arg; \ + mod_timer(&timer, (jiffies + exp)) \ + /* * Constants to be programmed into the Xena's registers, to configure * the XAUI. @@ -2741,6 +2747,7 @@ int s2io_open(struct net_device *dev) setting_mac_address_failed: free_irq(sp->pdev->irq, dev); isr_registration_failed: + del_timer_sync(&sp->alarm_timer); s2io_reset(sp); hw_init_failed: return err; @@ -2898,6 +2905,15 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static void +s2io_alarm_handle(unsigned long data) +{ + nic_t *sp = (nic_t *)data; + + alarm_intr_handler(sp); + mod_timer(&sp->alarm_timer, jiffies + HZ / 2); +} + /** * s2io_isr - ISR handler of the device . * @irq: the irq of the device. @@ -2942,9 +2958,6 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) return IRQ_NONE; } - if (reason & (GEN_ERROR_INTR)) - alarm_intr_handler(sp); - #ifdef CONFIG_S2IO_NAPI if (reason & GEN_INTR_RXTRAFFIC) { if (netif_rx_schedule_prep(dev)) { @@ -4394,6 +4407,7 @@ static void s2io_card_down(nic_t * sp) unsigned long flags; register u64 val64 = 0; + del_timer_sync(&sp->alarm_timer); /* If s2io_set_link task is executing, wait till it completes. */ while (test_and_set_bit(0, &(sp->link_state))) { msleep(50); @@ -4496,6 +4510,8 @@ static int s2io_card_up(nic_t * sp) return -ENODEV; } + S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2)); + atomic_set(&sp->card_state, CARD_UP); return 0; } diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index ce9bf6d5ee00..263fe7a1b903 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -624,6 +624,9 @@ struct s2io_nic { struct tasklet_struct task; volatile unsigned long tasklet_status; + /* Timer that handles I/O errors/exceptions */ + struct timer_list alarm_timer; + /* Space to back up the PCI config space */ u32 config_space[256 / sizeof(u32)]; @@ -819,6 +822,7 @@ static int s2io_poll(struct net_device *dev, int *budget); #endif static void s2io_init_pci(nic_t * sp); int s2io_set_mac_addr(struct net_device *dev, u8 * addr); +static void s2io_alarm_handle(unsigned long data); static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs); static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag); static struct ethtool_ops netdev_ethtool_ops; From be3a6b02eb68a4d47397b771b6e4aa1f7f0f7ffb Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:35:55 -0700 Subject: [PATCH 072/584] [PATCH] S2io: VLAN support Hi, Patch below adds VLAN support to the driver. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 59 ++++++++++++++++++++++++++++++++++++++++++++-- drivers/net/s2io.h | 2 ++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index ee498d248d38..db3e394c740b 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -174,6 +175,30 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { timer.data = (unsigned long) arg; \ mod_timer(&timer, (jiffies + exp)) \ +/* Add the vlan */ +static void s2io_vlan_rx_register(struct net_device *dev, + struct vlan_group *grp) +{ + nic_t *nic = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&nic->tx_lock, flags); + nic->vlgrp = grp; + spin_unlock_irqrestore(&nic->tx_lock, flags); +} + +/* Unregister the vlan */ +static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid) +{ + nic_t *nic = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&nic->tx_lock, flags); + if (nic->vlgrp) + nic->vlgrp->vlan_devices[vid] = NULL; + spin_unlock_irqrestore(&nic->tx_lock, flags); +} + /* * Constants to be programmed into the Xena's registers, to configure * the XAUI. @@ -2803,6 +2828,8 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef NETIF_F_TSO int mss; #endif + u16 vlan_tag = 0; + int vlan_priority = 0; mac_info_t *mac_control; struct config_param *config; @@ -2821,6 +2848,13 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) queue = 0; + /* Get Fifo number to Transmit based on vlan priority */ + if (sp->vlgrp && vlan_tx_tag_present(skb)) { + vlan_tag = vlan_tx_tag_get(skb); + vlan_priority = vlan_tag >> 13; + queue = config->fifo_mapping[vlan_priority]; + } + put_off = (u16) mac_control->fifos[queue].tx_curr_put_info.offset; get_off = (u16) mac_control->fifos[queue].tx_curr_get_info.offset; txdp = (TxD_t *) mac_control->fifos[queue].list_info[put_off]. @@ -2857,6 +2891,11 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_2 |= config->tx_intr_type; + if (sp->vlgrp && vlan_tx_tag_present(skb)) { + txdp->Control_2 |= TXD_VLAN_ENABLE; + txdp->Control_2 |= TXD_VLAN_TAG(vlan_tag); + } + txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) | TXD_GATHER_CODE_FIRST); txdp->Control_1 |= TXD_LIST_OWN_XENA; @@ -4653,10 +4692,23 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) skb->protocol = eth_type_trans(skb, dev); #ifdef CONFIG_S2IO_NAPI - netif_receive_skb(skb); + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_receive_skb(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_receive_skb(skb); + } #else - netif_rx(skb); + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_rx(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_rx(skb); + } #endif + dev->last_rx = jiffies; atomic_dec(&sp->rx_bufs_left[ring_no]); return SUCCESS; @@ -4954,6 +5006,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) dev->do_ioctl = &s2io_ioctl; dev->change_mtu = &s2io_change_mtu; SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->vlan_rx_register = s2io_vlan_rx_register; + dev->vlan_rx_kill_vid = (void *)s2io_vlan_rx_kill_vid; /* * will use eth_mac_addr() for dev->set_mac_address diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 263fe7a1b903..b924ef21814a 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -689,6 +689,8 @@ struct s2io_nic { #define CARD_UP 2 atomic_t card_state; volatile unsigned long link_state; + struct vlan_group *vlgrp; + spinlock_t rx_lock; atomic_t isr_cnt; }; From 541ae68f6ddf1c27aa6879935ce541f110484202 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:36:55 -0700 Subject: [PATCH 073/584] [PATCH] S2io: Support for Xframe II NIC Hi, This patch provides basic support for the Xframe II adapter. Includes the following changes: 1. New values to program XAUI interface. 2. Print the PCI/PCI-X mode(bus frequency, width). 3. Remove EOI from reset during intialization. 4. Enable all 8 PCCs if Xframe II adapter. 5. Programs the RLDRAM size depending on the device. (Note: RLDRAM size on XFARME-I is 64Mb whereas on XFRAME-II it's 32 Mb). 6. Enable extended(64-bit) statistics counters. 7. Program timer interrupt duration based on PCI/PCI-X clock speed. 8. Not required to save/restore PCI config space before/after reset. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io-regs.h | 53 +++-- drivers/net/s2io.c | 462 +++++++++++++++++++++++++++++++--------- drivers/net/s2io.h | 64 ++++++ 3 files changed, 471 insertions(+), 108 deletions(-) diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h index 826deb0eb03a..159d87648f64 100644 --- a/drivers/net/s2io-regs.h +++ b/drivers/net/s2io-regs.h @@ -91,7 +91,21 @@ typedef struct _XENA_dev_config { SERR_SOURCE_MC | \ SERR_SOURCE_XGXS) - u8 unused_0[0x800 - 0x120]; + u64 pci_mode; +#define GET_PCI_MODE(val) ((val & vBIT(0xF, 0, 4)) >> 60) +#define PCI_MODE_PCI_33 0 +#define PCI_MODE_PCI_66 0x1 +#define PCI_MODE_PCIX_M1_66 0x2 +#define PCI_MODE_PCIX_M1_100 0x3 +#define PCI_MODE_PCIX_M1_133 0x4 +#define PCI_MODE_PCIX_M2_66 0x5 +#define PCI_MODE_PCIX_M2_100 0x6 +#define PCI_MODE_PCIX_M2_133 0x7 +#define PCI_MODE_UNSUPPORTED BIT(0) +#define PCI_MODE_32_BITS BIT(8) +#define PCI_MODE_UNKNOWN_MODE BIT(9) + + u8 unused_0[0x800 - 0x128]; /* PCI-X Controller registers */ u64 pic_int_status; @@ -223,19 +237,16 @@ typedef struct _XENA_dev_config { u64 xmsi_data; u64 rx_mat; +#define RX_MAT_SET(ring, msi) vBIT(msi, (8 * ring), 8) u8 unused6[0x8]; - u64 tx_mat0_7; - u64 tx_mat8_15; - u64 tx_mat16_23; - u64 tx_mat24_31; - u64 tx_mat32_39; - u64 tx_mat40_47; - u64 tx_mat48_55; - u64 tx_mat56_63; + u64 tx_mat0_n[0x8]; +#define TX_MAT_SET(fifo, msi) vBIT(msi, (8 * fifo), 8) - u8 unused_1[0x10]; + u8 unused_1[0x8]; + u64 stat_byte_cnt; +#define STAT_BC(n) vBIT(n,4,12) /* Automated statistics collection */ u64 stat_cfg; @@ -269,7 +280,12 @@ typedef struct _XENA_dev_config { u64 gpio_control; #define GPIO_CTRL_GPIO_0 BIT(8) - u8 unused7[0x600]; + u8 unused7_1[0x240 - 0x200]; + + u64 wreq_split_mask; +#define WREQ_SPLIT_MASK_SET_MASK(val) vBIT(val, 52, 12) + + u8 unused7_2[0x800 - 0x248]; /* TxDMA registers */ u64 txdma_int_status; @@ -470,6 +486,7 @@ typedef struct _XENA_dev_config { #define PRC_CTRL_NO_SNOOP (BIT(22)|BIT(23)) #define PRC_CTRL_NO_SNOOP_DESC BIT(22) #define PRC_CTRL_NO_SNOOP_BUFF BIT(23) +#define PRC_CTRL_BIMODAL_INTERRUPT BIT(37) #define PRC_CTRL_RXD_BACKOFF_INTERVAL(val) vBIT(val,40,24) u64 prc_alarm_action; @@ -742,7 +759,19 @@ typedef struct _XENA_dev_config { u64 mc_rldram_test_d1; u8 unused24[0x300 - 0x288]; u64 mc_rldram_test_d2; - u8 unused25[0x700 - 0x308]; + + u8 unused24_1[0x360 - 0x308]; + u64 mc_rldram_ctrl; +#define MC_RLDRAM_ENABLE_ODT BIT(7) + + u8 unused24_2[0x640 - 0x368]; + u64 mc_rldram_ref_per_herc; +#define MC_RLDRAM_SET_REF_PERIOD(val) vBIT(val, 0, 16) + + u8 unused24_3[0x660 - 0x648]; + u64 mc_rldram_mrs_herc; + + u8 unused25[0x700 - 0x668]; u64 mc_debug_ctrl; u8 unused26[0x3000 - 0x2f08]; diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index db3e394c740b..15e2ee9f9703 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -84,9 +84,10 @@ static inline int RXD_IS_UP2DT(RxD_t *rxdp) * problem, 600B, 600C, 600D, 640B, 640C and 640D. * macro below identifies these cards given the subsystem_id. */ -#define CARDS_WITH_FAULTY_LINK_INDICATORS(subid) \ - (((subid >= 0x600B) && (subid <= 0x600D)) || \ - ((subid >= 0x640B) && (subid <= 0x640D))) ? 1 : 0 +#define CARDS_WITH_FAULTY_LINK_INDICATORS(dev_type, subid) \ + (dev_type == XFRAME_I_DEVICE) ? \ + ((((subid >= 0x600B) && (subid <= 0x600D)) || \ + ((subid >= 0x640B) && (subid <= 0x640D))) ? 1 : 0) : 0 #define LINK_IS_UP(val64) (!(val64 & (ADAPTER_STATUS_RMAC_REMOTE_FAULT | \ ADAPTER_STATUS_RMAC_LOCAL_FAULT))) @@ -207,7 +208,24 @@ static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid) #define SWITCH_SIGN 0xA5A5A5A5A5A5A5A5ULL #define END_SIGN 0x0 -static u64 default_mdio_cfg[] = { +static u64 herc_act_dtx_cfg[] = { + /* Set address */ + 0x80000515BA750000ULL, 0x80000515BA7500E0ULL, + /* Write data */ + 0x80000515BA750004ULL, 0x80000515BA7500E4ULL, + /* Set address */ + 0x80010515003F0000ULL, 0x80010515003F00E0ULL, + /* Write data */ + 0x80010515003F0004ULL, 0x80010515003F00E4ULL, + /* Set address */ + 0x80020515F2100000ULL, 0x80020515F21000E0ULL, + /* Write data */ + 0x80020515F2100004ULL, 0x80020515F21000E4ULL, + /* Done */ + END_SIGN +}; + +static u64 xena_mdio_cfg[] = { /* Reset PMA PLL */ 0xC001010000000000ULL, 0xC0010100000000E0ULL, 0xC0010100008000E4ULL, @@ -217,7 +235,7 @@ static u64 default_mdio_cfg[] = { END_SIGN }; -static u64 default_dtx_cfg[] = { +static u64 xena_dtx_cfg[] = { 0x8000051500000000ULL, 0x80000515000000E0ULL, 0x80000515D93500E4ULL, 0x8001051500000000ULL, 0x80010515000000E0ULL, 0x80010515001E00E4ULL, @@ -655,6 +673,87 @@ static void free_shared_mem(struct s2io_nic *nic) } } +/** + * s2io_verify_pci_mode - + */ + +static int s2io_verify_pci_mode(nic_t *nic) +{ + XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0; + register u64 val64 = 0; + int mode; + + val64 = readq(&bar0->pci_mode); + mode = (u8)GET_PCI_MODE(val64); + + if ( val64 & PCI_MODE_UNKNOWN_MODE) + return -1; /* Unknown PCI mode */ + return mode; +} + + +/** + * s2io_print_pci_mode - + */ +static int s2io_print_pci_mode(nic_t *nic) +{ + XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0; + register u64 val64 = 0; + int mode; + struct config_param *config = &nic->config; + + val64 = readq(&bar0->pci_mode); + mode = (u8)GET_PCI_MODE(val64); + + if ( val64 & PCI_MODE_UNKNOWN_MODE) + return -1; /* Unknown PCI mode */ + + if (val64 & PCI_MODE_32_BITS) { + DBG_PRINT(ERR_DBG, "%s: Device is on 32 bit ", nic->dev->name); + } else { + DBG_PRINT(ERR_DBG, "%s: Device is on 64 bit ", nic->dev->name); + } + + switch(mode) { + case PCI_MODE_PCI_33: + DBG_PRINT(ERR_DBG, "33MHz PCI bus\n"); + config->bus_speed = 33; + break; + case PCI_MODE_PCI_66: + DBG_PRINT(ERR_DBG, "66MHz PCI bus\n"); + config->bus_speed = 133; + break; + case PCI_MODE_PCIX_M1_66: + DBG_PRINT(ERR_DBG, "66MHz PCIX(M1) bus\n"); + config->bus_speed = 133; /* Herc doubles the clock rate */ + break; + case PCI_MODE_PCIX_M1_100: + DBG_PRINT(ERR_DBG, "100MHz PCIX(M1) bus\n"); + config->bus_speed = 200; + break; + case PCI_MODE_PCIX_M1_133: + DBG_PRINT(ERR_DBG, "133MHz PCIX(M1) bus\n"); + config->bus_speed = 266; + break; + case PCI_MODE_PCIX_M2_66: + DBG_PRINT(ERR_DBG, "133MHz PCIX(M2) bus\n"); + config->bus_speed = 133; + break; + case PCI_MODE_PCIX_M2_100: + DBG_PRINT(ERR_DBG, "200MHz PCIX(M2) bus\n"); + config->bus_speed = 200; + break; + case PCI_MODE_PCIX_M2_133: + DBG_PRINT(ERR_DBG, "266MHz PCIX(M2) bus\n"); + config->bus_speed = 266; + break; + default: + return -1; /* Unsupported bus speed */ + } + + return mode; +} + /** * init_nic - Initialization of hardware * @nic: device peivate variable @@ -687,6 +786,16 @@ static int init_nic(struct s2io_nic *nic) return -1; } + /* + * Herc requires EOI to be removed from reset before XGXS, so.. + */ + if (nic->device_type & XFRAME_II_DEVICE) { + val64 = 0xA500000000ULL; + writeq(val64, &bar0->sw_reset); + msleep(500); + val64 = readq(&bar0->sw_reset); + } + /* Remove XGXS from reset state */ val64 = 0; writeq(val64, &bar0->sw_reset); @@ -718,41 +827,51 @@ static int init_nic(struct s2io_nic *nic) * of 64 bit values into two registers in a particular * sequence. Hence a macro 'SWITCH_SIGN' has been defined * which will be defined in the array of configuration values - * (default_dtx_cfg & default_mdio_cfg) at appropriate places + * (xena_dtx_cfg & xena_mdio_cfg) at appropriate places * to switch writing from one regsiter to another. We continue * writing these values until we encounter the 'END_SIGN' macro. * For example, After making a series of 21 writes into * dtx_control register the 'SWITCH_SIGN' appears and hence we * start writing into mdio_control until we encounter END_SIGN. */ - while (1) { - dtx_cfg: - while (default_dtx_cfg[dtx_cnt] != END_SIGN) { - if (default_dtx_cfg[dtx_cnt] == SWITCH_SIGN) { - dtx_cnt++; - goto mdio_cfg; - } - SPECIAL_REG_WRITE(default_dtx_cfg[dtx_cnt], + if (nic->device_type & XFRAME_II_DEVICE) { + while (herc_act_dtx_cfg[dtx_cnt] != END_SIGN) { + SPECIAL_REG_WRITE(xena_dtx_cfg[dtx_cnt], &bar0->dtx_control, UF); - val64 = readq(&bar0->dtx_control); + if (dtx_cnt & 0x1) + msleep(1); /* Necessary!! */ dtx_cnt++; } - mdio_cfg: - while (default_mdio_cfg[mdio_cnt] != END_SIGN) { - if (default_mdio_cfg[mdio_cnt] == SWITCH_SIGN) { + } else { + while (1) { + dtx_cfg: + while (xena_dtx_cfg[dtx_cnt] != END_SIGN) { + if (xena_dtx_cfg[dtx_cnt] == SWITCH_SIGN) { + dtx_cnt++; + goto mdio_cfg; + } + SPECIAL_REG_WRITE(xena_dtx_cfg[dtx_cnt], + &bar0->dtx_control, UF); + val64 = readq(&bar0->dtx_control); + dtx_cnt++; + } + mdio_cfg: + while (xena_mdio_cfg[mdio_cnt] != END_SIGN) { + if (xena_mdio_cfg[mdio_cnt] == SWITCH_SIGN) { + mdio_cnt++; + goto dtx_cfg; + } + SPECIAL_REG_WRITE(xena_mdio_cfg[mdio_cnt], + &bar0->mdio_control, UF); + val64 = readq(&bar0->mdio_control); mdio_cnt++; + } + if ((xena_dtx_cfg[dtx_cnt] == END_SIGN) && + (xena_mdio_cfg[mdio_cnt] == END_SIGN)) { + break; + } else { goto dtx_cfg; } - SPECIAL_REG_WRITE(default_mdio_cfg[mdio_cnt], - &bar0->mdio_control, UF); - val64 = readq(&bar0->mdio_control); - mdio_cnt++; - } - if ((default_dtx_cfg[dtx_cnt] == END_SIGN) && - (default_mdio_cfg[mdio_cnt] == END_SIGN)) { - break; - } else { - goto dtx_cfg; } } @@ -803,7 +922,8 @@ static int init_nic(struct s2io_nic *nic) * Disable 4 PCCs for Xena1, 2 and 3 as per H/W bug * SXE-008 TRANSMIT DMA ARBITRATION ISSUE. */ - if (get_xena_rev_id(nic->pdev) < 4) + if ((nic->device_type == XFRAME_I_DEVICE) && + (get_xena_rev_id(nic->pdev) < 4)) writeq(PCC_ENABLE_FOUR, &bar0->pcc_enable); val64 = readq(&bar0->tx_fifo_partition_0); @@ -833,7 +953,11 @@ static int init_nic(struct s2io_nic *nic) * configured Rings. */ val64 = 0; - mem_size = 64; + if (nic->device_type & XFRAME_II_DEVICE) + mem_size = 32; + else + mem_size = 64; + for (i = 0; i < config->rx_ring_num; i++) { switch (i) { case 0: @@ -1116,6 +1240,11 @@ static int init_nic(struct s2io_nic *nic) /* Program statistics memory */ writeq(mac_control->stats_mem_phy, &bar0->stat_addr); + if (nic->device_type == XFRAME_II_DEVICE) { + val64 = STAT_BC(0x320); + writeq(val64, &bar0->stat_byte_cnt); + } + /* * Initializing the sampling rate for the device to calculate the * bandwidth utilization. @@ -1134,12 +1263,18 @@ static int init_nic(struct s2io_nic *nic) * 250 interrupts per sec. Continuous interrupts are enabled * by default. */ - val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078) | - TTI_DATA1_MEM_TX_URNG_A(0xA) | + if (nic->device_type == XFRAME_II_DEVICE) { + int count = (nic->config.bus_speed * 125)/2; + val64 = TTI_DATA1_MEM_TX_TIMER_VAL(count); + } else { + + val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078); + } + val64 |= TTI_DATA1_MEM_TX_URNG_A(0xA) | TTI_DATA1_MEM_TX_URNG_B(0x10) | TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN; - if (use_continuous_tx_intrs) - val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN; + if (use_continuous_tx_intrs) + val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN; writeq(val64, &bar0->tti_data1_mem); val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) | @@ -1171,9 +1306,19 @@ static int init_nic(struct s2io_nic *nic) time++; } + /* RTI Initialization */ - val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF) | - RTI_DATA1_MEM_RX_URNG_A(0xA) | + if (nic->device_type == XFRAME_II_DEVICE) { + /* + * Programmed to generate Apprx 500 Intrs per + * second + */ + int count = (nic->config.bus_speed * 125)/4; + val64 = RTI_DATA1_MEM_RX_TIMER_VAL(count); + } else { + val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF); + } + val64 |= RTI_DATA1_MEM_RX_URNG_A(0xA) | RTI_DATA1_MEM_RX_URNG_B(0x10) | RTI_DATA1_MEM_RX_URNG_C(0x30) | RTI_DATA1_MEM_RX_TIMER_AC_EN; @@ -1267,6 +1412,15 @@ static int init_nic(struct s2io_nic *nic) val64 |= PIC_CNTL_SHARED_SPLITS(shared_splits); writeq(val64, &bar0->pic_control); + /* + * Programming the Herc to split every write transaction + * that does not start on an ADB to reduce disconnects. + */ + if (nic->device_type == XFRAME_II_DEVICE) { + val64 = WREQ_SPLIT_MASK_SET_MASK(255); + writeq(val64, &bar0->wreq_split_mask); + } + return SUCCESS; } @@ -1509,18 +1663,18 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) } } -static int check_prc_pcc_state(u64 val64, int flag, int rev_id) +static int check_prc_pcc_state(u64 val64, int flag, int rev_id, int herc) { int ret = 0; if (flag == FALSE) { - if (rev_id >= 4) { + if ((!herc && (rev_id >= 4)) || herc) { if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) && ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == ADAPTER_STATUS_RC_PRC_QUIESCENT)) { ret = 1; } - } else { + }else { if (!(val64 & ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) && ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) == ADAPTER_STATUS_RC_PRC_QUIESCENT)) { @@ -1528,7 +1682,7 @@ static int check_prc_pcc_state(u64 val64, int flag, int rev_id) } } } else { - if (rev_id >= 4) { + if ((!herc && (rev_id >= 4)) || herc) { if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) == ADAPTER_STATUS_RMAC_PCC_IDLE) && (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) || @@ -1564,10 +1718,11 @@ static int check_prc_pcc_state(u64 val64, int flag, int rev_id) static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag) { - int ret = 0; + int ret = 0, herc; u64 tmp64 = ~((u64) val64); int rev_id = get_xena_rev_id(sp->pdev); + herc = (sp->device_type == XFRAME_II_DEVICE); if (! (tmp64 & (ADAPTER_STATUS_TDMA_READY | ADAPTER_STATUS_RDMA_READY | @@ -1575,7 +1730,7 @@ static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag) ADAPTER_STATUS_PIC_QUIESCENT | ADAPTER_STATUS_MC_DRAM_READY | ADAPTER_STATUS_MC_QUEUES_READY | ADAPTER_STATUS_M_PLL_LOCK | ADAPTER_STATUS_P_PLL_LOCK))) { - ret = check_prc_pcc_state(val64, flag, rev_id); + ret = check_prc_pcc_state(val64, flag, rev_id, herc); } return ret; @@ -1706,7 +1861,8 @@ static int start_nic(struct s2io_nic *nic) /* SXE-002: Initialize link and activity LED */ subid = nic->pdev->subsystem_device; - if ((subid & 0xFF) >= 0x07) { + if (((subid & 0xFF) >= 0x07) && + (nic->device_type == XFRAME_I_DEVICE)) { val64 = readq(&bar0->gpio_control); val64 |= 0x0000800000000000ULL; writeq(val64, &bar0->gpio_control); @@ -2541,9 +2697,12 @@ void s2io_reset(nic_t * sp) */ msleep(250); + if (!(sp->device_type & XFRAME_II_DEVICE)) { /* Restore the PCI state saved during initializarion. */ - pci_restore_state(sp->pdev); - + pci_restore_state(sp->pdev); + } else { + pci_set_master(sp->pdev); + } s2io_init_pci(sp); msleep(250); @@ -2568,7 +2727,8 @@ void s2io_reset(nic_t * sp) /* SXE-002: Configure link and activity LED to turn it off */ subid = sp->pdev->subsystem_device; - if ((subid & 0xFF) >= 0x07) { + if (((subid & 0xFF) >= 0x07) && + (sp->device_type == XFRAME_I_DEVICE)) { val64 = readq(&bar0->gpio_control); val64 |= 0x0000800000000000ULL; writeq(val64, &bar0->gpio_control); @@ -2576,6 +2736,15 @@ void s2io_reset(nic_t * sp) writeq(val64, (void __iomem *) ((u8 *) bar0 + 0x2700)); } + /* + * Clear spurious ECC interrupts that would have occured on + * XFRAME II cards after reset. + */ + if (sp->device_type == XFRAME_II_DEVICE) { + val64 = readq(&bar0->pcc_err_reg); + writeq(val64, &bar0->pcc_err_reg); + } + sp->device_enabled_once = FALSE; } @@ -3463,7 +3632,8 @@ static void s2io_phy_id(unsigned long data) u16 subid; subid = sp->pdev->subsystem_device; - if ((subid & 0xFF) >= 0x07) { + if ((sp->device_type == XFRAME_II_DEVICE) || + ((subid & 0xFF) >= 0x07)) { val64 = readq(&bar0->gpio_control); val64 ^= GPIO_CTRL_GPIO_0; writeq(val64, &bar0->gpio_control); @@ -3500,7 +3670,8 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data) subid = sp->pdev->subsystem_device; last_gpio_ctrl_val = readq(&bar0->gpio_control); - if ((subid & 0xFF) < 0x07) { + if ((sp->device_type == XFRAME_I_DEVICE) && + ((subid & 0xFF) < 0x07)) { val64 = readq(&bar0->adapter_control); if (!(val64 & ADAPTER_CNTL_EN)) { printk(KERN_ERR @@ -3520,7 +3691,7 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data) msleep_interruptible(MAX_FLICKER_TIME); del_timer_sync(&sp->id_timer); - if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) { + if (CARDS_WITH_FAULTY_LINK_INDICATORS(sp->device_type, subid)) { writeq(last_gpio_ctrl_val, &bar0->gpio_control); last_gpio_ctrl_val = readq(&bar0->gpio_control); } @@ -4134,44 +4305,91 @@ static void s2io_get_ethtool_stats(struct net_device *dev, StatInfo_t *stat_info = sp->mac_control.stats_info; s2io_updt_stats(sp); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_data_octets); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_frms_oflow) << 32 | + le32_to_cpu(stat_info->tmac_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_data_octets_oflow) << 32 | + le32_to_cpu(stat_info->tmac_data_octets); tmp_stats[i++] = le64_to_cpu(stat_info->tmac_drop_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_mcst_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_bcst_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_mcst_frms_oflow) << 32 | + le32_to_cpu(stat_info->tmac_mcst_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_bcst_frms_oflow) << 32 | + le32_to_cpu(stat_info->tmac_bcst_frms); tmp_stats[i++] = le64_to_cpu(stat_info->tmac_pause_ctrl_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_any_err_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_any_err_frms_oflow) << 32 | + le32_to_cpu(stat_info->tmac_any_err_frms); tmp_stats[i++] = le64_to_cpu(stat_info->tmac_vld_ip_octets); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_vld_ip); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_drop_ip); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_icmp); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_rst_tcp); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_vld_ip_oflow) << 32 | + le32_to_cpu(stat_info->tmac_vld_ip); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_drop_ip_oflow) << 32 | + le32_to_cpu(stat_info->tmac_drop_ip); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_icmp_oflow) << 32 | + le32_to_cpu(stat_info->tmac_icmp); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->tmac_rst_tcp_oflow) << 32 | + le32_to_cpu(stat_info->tmac_rst_tcp); tmp_stats[i++] = le64_to_cpu(stat_info->tmac_tcp); - tmp_stats[i++] = le32_to_cpu(stat_info->tmac_udp); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_data_octets); + tmp_stats[i++] = (u64)le32_to_cpu(stat_info->tmac_udp_oflow) << 32 | + le32_to_cpu(stat_info->tmac_udp); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_vld_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_vld_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_data_octets_oflow) << 32 | + le32_to_cpu(stat_info->rmac_data_octets); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_fcs_err_frms); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_drop_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_mcst_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_bcst_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_vld_mcst_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_vld_mcst_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_vld_bcst_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_vld_bcst_frms); tmp_stats[i++] = le32_to_cpu(stat_info->rmac_in_rng_len_err_frms); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_long_frms); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_pause_ctrl_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_discarded_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_usized_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_osized_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_frag_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_jabber_frms); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_ip); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_discarded_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_discarded_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_usized_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_usized_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_osized_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_osized_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_frag_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_frag_frms); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_jabber_frms_oflow) << 32 | + le32_to_cpu(stat_info->rmac_jabber_frms); + tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_ip_oflow) << 32 | + le32_to_cpu(stat_info->rmac_ip); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_ip_octets); tmp_stats[i++] = le32_to_cpu(stat_info->rmac_hdr_err_ip); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_drop_ip); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_icmp); + tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_drop_ip_oflow) << 32 | + le32_to_cpu(stat_info->rmac_drop_ip); + tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_icmp_oflow) << 32 | + le32_to_cpu(stat_info->rmac_icmp); tmp_stats[i++] = le64_to_cpu(stat_info->rmac_tcp); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_udp); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_drp_udp); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_pause_cnt); - tmp_stats[i++] = le32_to_cpu(stat_info->rmac_accepted_ip); + tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_udp_oflow) << 32 | + le32_to_cpu(stat_info->rmac_udp); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_err_drp_udp_oflow) << 32 | + le32_to_cpu(stat_info->rmac_err_drp_udp); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_pause_cnt_oflow) << 32 | + le32_to_cpu(stat_info->rmac_pause_cnt); + tmp_stats[i++] = + (u64)le32_to_cpu(stat_info->rmac_accepted_ip_oflow) << 32 | + le32_to_cpu(stat_info->rmac_accepted_ip); tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_tcp); tmp_stats[i++] = 0; tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs; @@ -4401,7 +4619,8 @@ static void s2io_set_link(unsigned long data) val64 = readq(&bar0->adapter_control); val64 |= ADAPTER_CNTL_EN; writeq(val64, &bar0->adapter_control); - if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) { + if (CARDS_WITH_FAULTY_LINK_INDICATORS(nic->device_type, + subid)) { val64 = readq(&bar0->gpio_control); val64 |= GPIO_CTRL_GPIO_0; writeq(val64, &bar0->gpio_control); @@ -4423,7 +4642,8 @@ static void s2io_set_link(unsigned long data) } s2io_link(nic, LINK_UP); } else { - if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) { + if (CARDS_WITH_FAULTY_LINK_INDICATORS(nic->device_type, + subid)) { val64 = readq(&bar0->gpio_control); val64 &= ~GPIO_CTRL_GPIO_0; writeq(val64, &bar0->gpio_control); @@ -4708,7 +4928,6 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) netif_rx(skb); } #endif - dev->last_rx = jiffies; atomic_dec(&sp->rx_bufs_left[ring_no]); return SUCCESS; @@ -4842,6 +5061,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) u16 subid; mac_info_t *mac_control; struct config_param *config; + int mode; #ifdef CONFIG_S2IO_NAPI DBG_PRINT(ERR_DBG, "NAPI support has been enabled\n"); @@ -4898,6 +5118,12 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) sp->high_dma_flag = dma_flag; sp->device_enabled_once = FALSE; + if ((pdev->device == PCI_DEVICE_ID_HERC_WIN) || + (pdev->device == PCI_DEVICE_ID_HERC_UNI)) + sp->device_type = XFRAME_II_DEVICE; + else + sp->device_type = XFRAME_I_DEVICE; + /* Initialize some PCI/PCI-X fields of the NIC. */ s2io_init_pci(sp); @@ -5033,7 +5259,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) INIT_WORK(&sp->set_link_task, (void (*)(void *)) s2io_set_link, sp); - pci_save_state(sp->pdev); + if (!(sp->device_type & XFRAME_II_DEVICE)) { + pci_save_state(sp->pdev); + } /* Setting swapper control on the NIC, for proper reset operation */ if (s2io_set_swapper(sp)) { @@ -5043,12 +5271,26 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) goto set_swap_failed; } - /* - * Fix for all "FFs" MAC address problems observed on - * Alpha platforms - */ - fix_mac_address(sp); - s2io_reset(sp); + /* Verify if the Herc works on the slot its placed into */ + if (sp->device_type & XFRAME_II_DEVICE) { + mode = s2io_verify_pci_mode(sp); + if (mode < 0) { + DBG_PRINT(ERR_DBG, "%s: ", __FUNCTION__); + DBG_PRINT(ERR_DBG, " Unsupported PCI bus mode\n"); + ret = -EBADSLT; + goto set_swap_failed; + } + } + + /* Not needed for Herc */ + if (sp->device_type & XFRAME_I_DEVICE) { + /* + * Fix for all "FFs" MAC address problems observed on + * Alpha platforms + */ + fix_mac_address(sp); + s2io_reset(sp); + } /* * MAC address initialization. @@ -5073,22 +5315,13 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) sp->def_mac_addr[0].mac_addr[5] = (u8) (mac_down >> 16); sp->def_mac_addr[0].mac_addr[4] = (u8) (mac_down >> 24); - DBG_PRINT(INIT_DBG, - "DEFAULT MAC ADDR:0x%02x-%02x-%02x-%02x-%02x-%02x\n", - sp->def_mac_addr[0].mac_addr[0], - sp->def_mac_addr[0].mac_addr[1], - sp->def_mac_addr[0].mac_addr[2], - sp->def_mac_addr[0].mac_addr[3], - sp->def_mac_addr[0].mac_addr[4], - sp->def_mac_addr[0].mac_addr[5]); - /* Set the factory defined MAC address initially */ dev->addr_len = ETH_ALEN; memcpy(dev->dev_addr, sp->def_mac_addr, ETH_ALEN); /* * Initialize the tasklet status and link state flags - * and the card statte parameter + * and the card state parameter */ atomic_set(&(sp->card_state), 0); sp->tasklet_status = 0; @@ -5123,9 +5356,46 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) goto register_failed; } + if (sp->device_type & XFRAME_II_DEVICE) { + DBG_PRINT(ERR_DBG, "%s: Neterion Xframe II 10GbE adapter ", + dev->name); + DBG_PRINT(ERR_DBG, "(rev %d), Driver %s\n", + get_xena_rev_id(sp->pdev), + s2io_driver_version); + DBG_PRINT(ERR_DBG, "MAC ADDR: %02x:%02x:%02x:%02x:%02x:%02x\n", + sp->def_mac_addr[0].mac_addr[0], + sp->def_mac_addr[0].mac_addr[1], + sp->def_mac_addr[0].mac_addr[2], + sp->def_mac_addr[0].mac_addr[3], + sp->def_mac_addr[0].mac_addr[4], + sp->def_mac_addr[0].mac_addr[5]); + int mode = s2io_print_pci_mode(sp); + if (mode < 0) { + DBG_PRINT(ERR_DBG, " Unsupported PCI bus mode "); + ret = -EBADSLT; + goto set_swap_failed; + } + } else { + DBG_PRINT(ERR_DBG, "%s: Neterion Xframe I 10GbE adapter ", + dev->name); + DBG_PRINT(ERR_DBG, "(rev %d), Driver %s\n", + get_xena_rev_id(sp->pdev), + s2io_driver_version); + DBG_PRINT(ERR_DBG, "MAC ADDR: %02x:%02x:%02x:%02x:%02x:%02x\n", + sp->def_mac_addr[0].mac_addr[0], + sp->def_mac_addr[0].mac_addr[1], + sp->def_mac_addr[0].mac_addr[2], + sp->def_mac_addr[0].mac_addr[3], + sp->def_mac_addr[0].mac_addr[4], + sp->def_mac_addr[0].mac_addr[5]); + } + /* Initialize device name */ strcpy(sp->name, dev->name); - strcat(sp->name, ": Neterion Xframe I 10GbE adapter"); + if (sp->device_type & XFRAME_II_DEVICE) + strcat(sp->name, ": Neterion Xframe II 10GbE adapter"); + else + strcat(sp->name, ": Neterion Xframe I 10GbE adapter"); /* * Make Link state as off at this point, when the Link change diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index b924ef21814a..df8cfd0475be 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -201,6 +201,67 @@ typedef struct stat_block { u32 rxf_wr_cnt; u32 txf_rd_cnt; +/* Tx MAC statistics overflow counters. */ + u32 tmac_data_octets_oflow; + u32 tmac_frms_oflow; + u32 tmac_bcst_frms_oflow; + u32 tmac_mcst_frms_oflow; + u32 tmac_ucst_frms_oflow; + u32 tmac_ttl_octets_oflow; + u32 tmac_any_err_frms_oflow; + u32 tmac_nucst_frms_oflow; + u64 tmac_vlan_frms; + u32 tmac_drop_ip_oflow; + u32 tmac_vld_ip_oflow; + u32 tmac_rst_tcp_oflow; + u32 tmac_icmp_oflow; + u32 tpa_unknown_protocol; + u32 tmac_udp_oflow; + u32 reserved_10; + u32 tpa_parse_failure; + +/* Rx MAC Statistics overflow counters. */ + u32 rmac_data_octets_oflow; + u32 rmac_vld_frms_oflow; + u32 rmac_vld_bcst_frms_oflow; + u32 rmac_vld_mcst_frms_oflow; + u32 rmac_accepted_ucst_frms_oflow; + u32 rmac_ttl_octets_oflow; + u32 rmac_discarded_frms_oflow; + u32 rmac_accepted_nucst_frms_oflow; + u32 rmac_usized_frms_oflow; + u32 rmac_drop_events_oflow; + u32 rmac_frag_frms_oflow; + u32 rmac_osized_frms_oflow; + u32 rmac_ip_oflow; + u32 rmac_jabber_frms_oflow; + u32 rmac_icmp_oflow; + u32 rmac_drop_ip_oflow; + u32 rmac_err_drp_udp_oflow; + u32 rmac_udp_oflow; + u32 reserved_11; + u32 rmac_pause_cnt_oflow; + u64 rmac_ttl_1519_4095_frms; + u64 rmac_ttl_4096_8191_frms; + u64 rmac_ttl_8192_max_frms; + u64 rmac_ttl_gt_max_frms; + u64 rmac_osized_alt_frms; + u64 rmac_jabber_alt_frms; + u64 rmac_gt_max_alt_frms; + u64 rmac_vlan_frms; + u32 rmac_len_discard; + u32 rmac_fcs_discard; + u32 rmac_pf_discard; + u32 rmac_da_discard; + u32 rmac_red_discard; + u32 rmac_rts_discard; + u32 reserved_12; + u32 rmac_ingm_full_discard; + u32 reserved_13; + u32 rmac_accepted_ip_oflow; + u32 reserved_14; + u32 link_fault_cnt; + /* Software statistics maintained by driver */ swStat_t sw_stat; } StatInfo_t; @@ -690,6 +751,9 @@ struct s2io_nic { atomic_t card_state; volatile unsigned long link_state; struct vlan_group *vlgrp; +#define XFRAME_I_DEVICE 1 +#define XFRAME_II_DEVICE 2 + u8 device_type; spinlock_t rx_lock; atomic_t isr_cnt; From b6e3f9828b9dc188cfe80364365cc68bf45df949 Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:38:01 -0700 Subject: [PATCH 074/584] [PATCH] S2io: Support for bimodal interrupts Hi, This is a patch to provide bimodal interrupt moderation support for Xframe II adapter. Basically, in this moderation scheme, the adapter raises a traffic interrupt if the no. of packets transmitted and/or received reaches a programmable threshold. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 126 +++++++++++++++++++++++++++++++-------------- drivers/net/s2io.h | 3 +- 2 files changed, 87 insertions(+), 42 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 15e2ee9f9703..f430ffe7d6f8 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -297,6 +297,7 @@ static unsigned int mc_pause_threshold_q4q7 = 187; static unsigned int shared_splits; static unsigned int tmac_util_period = 5; static unsigned int rmac_util_period = 5; +static unsigned int bimodal = 0; #ifndef CONFIG_S2IO_NAPI static unsigned int indicate_max_pkts; #endif @@ -1306,52 +1307,86 @@ static int init_nic(struct s2io_nic *nic) time++; } + if (nic->config.bimodal) { + int k = 0; + for (k = 0; k < config->rx_ring_num; k++) { + val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD; + val64 |= TTI_CMD_MEM_OFFSET(0x38+k); + writeq(val64, &bar0->tti_command_mem); - /* RTI Initialization */ - if (nic->device_type == XFRAME_II_DEVICE) { /* - * Programmed to generate Apprx 500 Intrs per - * second - */ - int count = (nic->config.bus_speed * 125)/4; - val64 = RTI_DATA1_MEM_RX_TIMER_VAL(count); + * Once the operation completes, the Strobe bit of the command + * register will be reset. We poll for this particular condition + * We wait for a maximum of 500ms for the operation to complete, + * if it's not complete by then we return error. + */ + time = 0; + while (TRUE) { + val64 = readq(&bar0->tti_command_mem); + if (!(val64 & TTI_CMD_MEM_STROBE_NEW_CMD)) { + break; + } + if (time > 10) { + DBG_PRINT(ERR_DBG, + "%s: TTI init Failed\n", + dev->name); + return -1; + } + time++; + msleep(50); + } + } } else { - val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF); - } - val64 |= RTI_DATA1_MEM_RX_URNG_A(0xA) | - RTI_DATA1_MEM_RX_URNG_B(0x10) | - RTI_DATA1_MEM_RX_URNG_C(0x30) | RTI_DATA1_MEM_RX_TIMER_AC_EN; - writeq(val64, &bar0->rti_data1_mem); - - val64 = RTI_DATA2_MEM_RX_UFC_A(0x1) | - RTI_DATA2_MEM_RX_UFC_B(0x2) | - RTI_DATA2_MEM_RX_UFC_C(0x40) | RTI_DATA2_MEM_RX_UFC_D(0x80); - writeq(val64, &bar0->rti_data2_mem); - - val64 = RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE_NEW_CMD; - writeq(val64, &bar0->rti_command_mem); - - /* - * Once the operation completes, the Strobe bit of the - * command register will be reset. We poll for this - * particular condition. We wait for a maximum of 500ms - * for the operation to complete, if it's not complete - * by then we return error. - */ - time = 0; - while (TRUE) { - val64 = readq(&bar0->rti_command_mem); - if (!(val64 & RTI_CMD_MEM_STROBE_NEW_CMD)) { - break; + /* RTI Initialization */ + if (nic->device_type == XFRAME_II_DEVICE) { + /* + * Programmed to generate Apprx 500 Intrs per + * second + */ + int count = (nic->config.bus_speed * 125)/4; + val64 = RTI_DATA1_MEM_RX_TIMER_VAL(count); + } else { + val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF); } - if (time > 10) { - DBG_PRINT(ERR_DBG, "%s: RTI init Failed\n", - dev->name); - return -1; + val64 |= RTI_DATA1_MEM_RX_URNG_A(0xA) | + RTI_DATA1_MEM_RX_URNG_B(0x10) | + RTI_DATA1_MEM_RX_URNG_C(0x30) | RTI_DATA1_MEM_RX_TIMER_AC_EN; + + writeq(val64, &bar0->rti_data1_mem); + + val64 = RTI_DATA2_MEM_RX_UFC_A(0x1) | + RTI_DATA2_MEM_RX_UFC_B(0x2) | + RTI_DATA2_MEM_RX_UFC_C(0x40) | RTI_DATA2_MEM_RX_UFC_D(0x80); + writeq(val64, &bar0->rti_data2_mem); + + for (i = 0; i < config->rx_ring_num; i++) { + val64 = RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE_NEW_CMD + | RTI_CMD_MEM_OFFSET(i); + writeq(val64, &bar0->rti_command_mem); + + /* + * Once the operation completes, the Strobe bit of the + * command register will be reset. We poll for this + * particular condition. We wait for a maximum of 500ms + * for the operation to complete, if it's not complete + * by then we return error. + */ + time = 0; + while (TRUE) { + val64 = readq(&bar0->rti_command_mem); + if (!(val64 & RTI_CMD_MEM_STROBE_NEW_CMD)) { + break; + } + if (time > 10) { + DBG_PRINT(ERR_DBG, "%s: RTI init Failed\n", + dev->name); + return -1; + } + time++; + msleep(50); + } } - time++; - msleep(50); } /* @@ -1789,6 +1824,8 @@ static int start_nic(struct s2io_nic *nic) &bar0->prc_rxd0_n[i]); val64 = readq(&bar0->prc_ctrl_n[i]); + if (nic->config.bimodal) + val64 |= PRC_CTRL_BIMODAL_INTERRUPT; #ifndef CONFIG_2BUFF_MODE val64 |= PRC_CTRL_RC_ENABLED; #else @@ -5030,6 +5067,7 @@ module_param(mc_pause_threshold_q4q7, int, 0); module_param(shared_splits, int, 0); module_param(tmac_util_period, int, 0); module_param(rmac_util_period, int, 0); +module_param(bimodal, bool, 0); #ifndef CONFIG_S2IO_NAPI module_param(indicate_max_pkts, int, 0); #endif @@ -5397,6 +5435,14 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) else strcat(sp->name, ": Neterion Xframe I 10GbE adapter"); + /* Initialize bimodal Interrupts */ + sp->config.bimodal = bimodal; + if (!(sp->device_type & XFRAME_II_DEVICE) && bimodal) { + sp->config.bimodal = 0; + DBG_PRINT(ERR_DBG,"%s:Bimodal intr not supported by Xframe I\n", + dev->name); + } + /* * Make Link state as off at this point, when the Link change * interrupt comes the state will be automatically changed to diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index df8cfd0475be..946314503daa 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -261,8 +261,6 @@ typedef struct stat_block { u32 rmac_accepted_ip_oflow; u32 reserved_14; u32 link_fault_cnt; - -/* Software statistics maintained by driver */ swStat_t sw_stat; } StatInfo_t; @@ -349,6 +347,7 @@ struct config_param { #define MAX_RX_BLOCKS_PER_RING 150 rx_ring_config_t rx_cfg[MAX_RX_RINGS]; /*Per-Rx Ring config */ + u8 bimodal; /*Flag for setting bimodal interrupts*/ #define HEADER_ETHERNET_II_802_3_SIZE 14 #define HEADER_802_2_SIZE 3 From a371a07de9bce837ea4e84569a2b390a42e360ef Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:38:59 -0700 Subject: [PATCH 075/584] [PATCH] S2io: New link handling scheme for Xframe II Hi, The below patch implements a new "Link state change handling" scheme supported by the Xframe II adapter. It also bumps up the driver version to 2.0.2.0. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io-regs.h | 8 ++- drivers/net/s2io.c | 147 +++++++++++++++++++++++++++++++--------- 2 files changed, 121 insertions(+), 34 deletions(-) diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h index 159d87648f64..2234a8f05eb2 100644 --- a/drivers/net/s2io-regs.h +++ b/drivers/net/s2io-regs.h @@ -167,7 +167,11 @@ typedef struct _XENA_dev_config { u8 unused4[0x08]; u64 gpio_int_reg; +#define GPIO_INT_REG_LINK_DOWN BIT(1) +#define GPIO_INT_REG_LINK_UP BIT(2) u64 gpio_int_mask; +#define GPIO_INT_MASK_LINK_DOWN BIT(1) +#define GPIO_INT_MASK_LINK_UP BIT(2) u64 gpio_alarms; u8 unused5[0x38]; @@ -279,8 +283,10 @@ typedef struct _XENA_dev_config { u64 gpio_control; #define GPIO_CTRL_GPIO_0 BIT(8) + u64 misc_control; +#define MISC_LINK_STABILITY_PRD(val) vBIT(val,29,3) - u8 unused7_1[0x240 - 0x200]; + u8 unused7_1[0x240 - 0x208]; u64 wreq_split_mask; #define WREQ_SPLIT_MASK_SET_MASK(val) vBIT(val, 52, 12) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index f430ffe7d6f8..e7c428561e3f 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -67,7 +67,7 @@ /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; -static char s2io_driver_version[] = "Version 1.7.7"; +static char s2io_driver_version[] = "Version 2.0.2.0"; static inline int RXD_IS_UP2DT(RxD_t *rxdp) { @@ -1456,8 +1456,28 @@ static int init_nic(struct s2io_nic *nic) writeq(val64, &bar0->wreq_split_mask); } + /* Setting Link stability period to 64 ms */ + if (nic->device_type == XFRAME_II_DEVICE) { + val64 = MISC_LINK_STABILITY_PRD(3); + writeq(val64, &bar0->misc_control); + } + return SUCCESS; } +#define LINK_UP_DOWN_INTERRUPT 1 +#define MAC_RMAC_ERR_TIMER 2 + +#if defined(CONFIG_MSI_MODE) || defined(CONFIG_MSIX_MODE) +#define s2io_link_fault_indication(x) MAC_RMAC_ERR_TIMER +#else +int s2io_link_fault_indication(nic_t *nic) +{ + if (nic->device_type == XFRAME_II_DEVICE) + return LINK_UP_DOWN_INTERRUPT; + else + return MAC_RMAC_ERR_TIMER; +} +#endif /** * en_dis_able_nic_intrs - Enable or Disable the interrupts @@ -1485,11 +1505,22 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) temp64 &= ~((u64) val64); writeq(temp64, &bar0->general_int_mask); /* - * Disabled all PCIX, Flash, MDIO, IIC and GPIO + * If Hercules adapter enable GPIO otherwise + * disabled all PCIX, Flash, MDIO, IIC and GPIO * interrupts for now. * TODO */ - writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask); + if (s2io_link_fault_indication(nic) == + LINK_UP_DOWN_INTERRUPT ) { + temp64 = readq(&bar0->pic_int_mask); + temp64 &= ~((u64) PIC_INT_GPIO); + writeq(temp64, &bar0->pic_int_mask); + temp64 = readq(&bar0->gpio_int_mask); + temp64 &= ~((u64) GPIO_INT_MASK_LINK_UP); + writeq(temp64, &bar0->gpio_int_mask); + } else { + writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask); + } /* * No MSI Support is available presently, so TTI and * RTI interrupts are also disabled. @@ -1580,17 +1611,8 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag) writeq(temp64, &bar0->general_int_mask); /* * All MAC block error interrupts are disabled for now - * except the link status change interrupt. * TODO */ - val64 = MAC_INT_STATUS_RMAC_INT; - temp64 = readq(&bar0->mac_int_mask); - temp64 &= ~((u64) val64); - writeq(temp64, &bar0->mac_int_mask); - - val64 = readq(&bar0->mac_rmac_err_mask); - val64 &= ~((u64) RMAC_LINK_STATE_CHANGE_INT); - writeq(val64, &bar0->mac_rmac_err_mask); } else if (flag == DISABLE_INTRS) { /* * Disable MAC Intrs in the general intr mask register @@ -1879,8 +1901,10 @@ static int start_nic(struct s2io_nic *nic) } /* Enable select interrupts */ - interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR | - RX_MAC_INTR | MC_INTR; + interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | MC_INTR; + interruptible |= TX_PIC_INTR | RX_PIC_INTR; + interruptible |= TX_MAC_INTR | RX_MAC_INTR; + en_dis_able_nic_intrs(nic, interruptible, ENABLE_INTRS); /* @@ -2004,8 +2028,9 @@ static void stop_nic(struct s2io_nic *nic) config = &nic->config; /* Disable all interrupts */ - interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR | - RX_MAC_INTR | MC_INTR; + interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | MC_INTR; + interruptible |= TX_PIC_INTR | RX_PIC_INTR; + interruptible |= TX_MAC_INTR | RX_MAC_INTR; en_dis_able_nic_intrs(nic, interruptible, DISABLE_INTRS); /* Disable PRCs */ @@ -2618,10 +2643,12 @@ static void alarm_intr_handler(struct s2io_nic *nic) register u64 val64 = 0, err_reg = 0; /* Handling link status change error Intr */ - err_reg = readq(&bar0->mac_rmac_err_reg); - writeq(err_reg, &bar0->mac_rmac_err_reg); - if (err_reg & RMAC_LINK_STATE_CHANGE_INT) { - schedule_work(&nic->set_link_task); + if (s2io_link_fault_indication(nic) == MAC_RMAC_ERR_TIMER) { + err_reg = readq(&bar0->mac_rmac_err_reg); + writeq(err_reg, &bar0->mac_rmac_err_reg); + if (err_reg & RMAC_LINK_STATE_CHANGE_INT) { + schedule_work(&nic->set_link_task); + } } /* Handling Ecc errors */ @@ -2947,7 +2974,7 @@ int s2io_open(struct net_device *dev) * Nic is initialized */ netif_carrier_off(dev); - sp->last_link_state = 0; /* Unkown link state */ + sp->last_link_state = LINK_DOWN; /* Initialize H/W and enable interrupts */ if (s2io_card_up(sp)) { @@ -3159,6 +3186,53 @@ s2io_alarm_handle(unsigned long data) mod_timer(&sp->alarm_timer, jiffies + HZ / 2); } +static void s2io_txpic_intr_handle(nic_t *sp) +{ + XENA_dev_config_t *bar0 = (XENA_dev_config_t *) sp->bar0; + u64 val64; + + val64 = readq(&bar0->pic_int_status); + if (val64 & PIC_INT_GPIO) { + val64 = readq(&bar0->gpio_int_reg); + if ((val64 & GPIO_INT_REG_LINK_DOWN) && + (val64 & GPIO_INT_REG_LINK_UP)) { + val64 |= GPIO_INT_REG_LINK_DOWN; + val64 |= GPIO_INT_REG_LINK_UP; + writeq(val64, &bar0->gpio_int_reg); + goto masking; + } + + if (((sp->last_link_state == LINK_UP) && + (val64 & GPIO_INT_REG_LINK_DOWN)) || + ((sp->last_link_state == LINK_DOWN) && + (val64 & GPIO_INT_REG_LINK_UP))) { + val64 = readq(&bar0->gpio_int_mask); + val64 |= GPIO_INT_MASK_LINK_DOWN; + val64 |= GPIO_INT_MASK_LINK_UP; + writeq(val64, &bar0->gpio_int_mask); + s2io_set_link((unsigned long)sp); + } +masking: + if (sp->last_link_state == LINK_UP) { + /*enable down interrupt */ + val64 = readq(&bar0->gpio_int_mask); + /* unmasks link down intr */ + val64 &= ~GPIO_INT_MASK_LINK_DOWN; + /* masks link up intr */ + val64 |= GPIO_INT_MASK_LINK_UP; + writeq(val64, &bar0->gpio_int_mask); + } else { + /*enable UP Interrupt */ + val64 = readq(&bar0->gpio_int_mask); + /* unmasks link up interrupt */ + val64 &= ~GPIO_INT_MASK_LINK_UP; + /* masks link down interrupt */ + val64 |= GPIO_INT_MASK_LINK_DOWN; + writeq(val64, &bar0->gpio_int_mask); + } + } +} + /** * s2io_isr - ISR handler of the device . * @irq: the irq of the device. @@ -3241,6 +3315,8 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) tx_intr_handler(&mac_control->fifos[i]); } + if (reason & GEN_INTR_TXPIC) + s2io_txpic_intr_handle(sp); /* * If the Rx buffer count is below the panic threshold then * reallocate the buffers from the interrupt handler itself, @@ -4644,11 +4720,13 @@ static void s2io_set_link(unsigned long data) } subid = nic->pdev->subsystem_device; - /* - * Allow a small delay for the NICs self initiated - * cleanup to complete. - */ - msleep(100); + if (s2io_link_fault_indication(nic) == MAC_RMAC_ERR_TIMER) { + /* + * Allow a small delay for the NICs self initiated + * cleanup to complete. + */ + msleep(100); + } val64 = readq(&bar0->adapter_status); if (verify_xena_quiescence(nic, val64, nic->device_enabled_once)) { @@ -4666,13 +4744,16 @@ static void s2io_set_link(unsigned long data) val64 |= ADAPTER_LED_ON; writeq(val64, &bar0->adapter_control); } - val64 = readq(&bar0->adapter_status); - if (!LINK_IS_UP(val64)) { - DBG_PRINT(ERR_DBG, "%s:", dev->name); - DBG_PRINT(ERR_DBG, " Link down"); - DBG_PRINT(ERR_DBG, "after "); - DBG_PRINT(ERR_DBG, "enabling "); - DBG_PRINT(ERR_DBG, "device \n"); + if (s2io_link_fault_indication(nic) == + MAC_RMAC_ERR_TIMER) { + val64 = readq(&bar0->adapter_status); + if (!LINK_IS_UP(val64)) { + DBG_PRINT(ERR_DBG, "%s:", dev->name); + DBG_PRINT(ERR_DBG, " Link down"); + DBG_PRINT(ERR_DBG, "after "); + DBG_PRINT(ERR_DBG, "enabling "); + DBG_PRINT(ERR_DBG, "device \n"); + } } if (nic->device_enabled_once == FALSE) { nic->device_enabled_once = TRUE; From 0b1f7ebe455ba4f1f46e7024150eeddbbf08addc Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:39:56 -0700 Subject: [PATCH 076/584] [PATCH] S2io: Miscellaneous fixes Hi, The last patch in this series fixes the following issues found during testing. 1. Ensure we don't pass zero sized buffers to the card(which can lockup) 2. Restore the PCI-X parameters(in case of Xframe I adapter) after a reset. 3. Make sure total size of all FIFOs does not exceed 8192. Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e7c428561e3f..abf910e40334 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -365,10 +365,9 @@ static int init_shared_mem(struct s2io_nic *nic) size += config->tx_cfg[i].fifo_len; } if (size > MAX_AVAILABLE_TXDS) { - DBG_PRINT(ERR_DBG, "%s: Total number of Tx FIFOs ", - dev->name); - DBG_PRINT(ERR_DBG, "exceeds the maximum value "); - DBG_PRINT(ERR_DBG, "that can be used\n"); + DBG_PRINT(ERR_DBG, "%s: Requested TxDs too high, ", + __FUNCTION__); + DBG_PRINT(ERR_DBG, "Requested: %d, max supported: 8192\n", size); return FAILURE; } @@ -611,8 +610,9 @@ static void free_shared_mem(struct s2io_nic *nic) lst_per_page); for (j = 0; j < page_num; j++) { int mem_blks = (j * lst_per_page); - if (!mac_control->fifos[i].list_info[mem_blks]. - list_virt_addr) + if ((!mac_control->fifos[i].list_info) || + (!mac_control->fifos[i].list_info[mem_blks]. + list_virt_addr)) break; pci_free_consistent(nic->pdev, PAGE_SIZE, mac_control->fifos[i]. @@ -2594,6 +2594,8 @@ static void tx_intr_handler(fifo_info_t *fifo_data) for (j = 0; j < frg_cnt; j++, txdlp++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; + if (!txdlp->Buffer_Pointer) + break; pci_unmap_page(nic->pdev, (dma_addr_t) txdlp-> @@ -2744,6 +2746,10 @@ void s2io_reset(nic_t * sp) u64 val64; u16 subid, pci_cmd; + /* Back up the PCI-X CMD reg, dont want to lose MMRBC, OST settings */ + if (sp->device_type == XFRAME_I_DEVICE) + pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, &(pci_cmd)); + val64 = SW_RESET_ALL; writeq(val64, &bar0->sw_reset); @@ -2762,8 +2768,10 @@ void s2io_reset(nic_t * sp) msleep(250); if (!(sp->device_type & XFRAME_II_DEVICE)) { - /* Restore the PCI state saved during initializarion. */ + /* Restore the PCI state saved during initializarion. */ pci_restore_state(sp->pdev); + pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, + pci_cmd); } else { pci_set_master(sp->pdev); } @@ -2974,7 +2982,7 @@ int s2io_open(struct net_device *dev) * Nic is initialized */ netif_carrier_off(dev); - sp->last_link_state = LINK_DOWN; + sp->last_link_state = 0; /* Initialize H/W and enable interrupts */ if (s2io_card_up(sp)) { @@ -3102,6 +3110,15 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) spin_unlock_irqrestore(&sp->tx_lock, flags); return 0; } + + /* A buffer with no data will be dropped */ + if (!skb->len) { + DBG_PRINT(TX_DBG, "%s:Buffer has no data..\n", dev->name); + dev_kfree_skb(skb); + spin_unlock_irqrestore(&sp->tx_lock, flags); + return 0; + } + #ifdef NETIF_F_TSO mss = skb_shinfo(skb)->tso_size; if (mss) { @@ -3136,6 +3153,9 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) /* For fragmented SKB. */ for (i = 0; i < frg_cnt; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + /* A '0' length fragment will be ignored */ + if (!frag->size) + continue; txdp++; txdp->Buffer_Pointer = (u64) pci_map_page (sp->pdev, frag->page, frag->page_offset, @@ -5257,7 +5277,8 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) config = &sp->config; /* Tx side parameters. */ - tx_fifo_len[0] = DEFAULT_FIFO_LEN; /* Default value. */ + if (tx_fifo_len[0] == 0) + tx_fifo_len[0] = DEFAULT_FIFO_LEN; /* Default value. */ config->tx_fifo_num = tx_fifo_num; for (i = 0; i < MAX_TX_FIFOS; i++) { config->tx_cfg[i].fifo_len = tx_fifo_len[i]; @@ -5280,7 +5301,8 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) config->max_txds = MAX_SKB_FRAGS; /* Rx side parameters. */ - rx_ring_sz[0] = SMALL_BLK_CNT; /* Default value. */ + if (rx_ring_sz[0] == 0) + rx_ring_sz[0] = SMALL_BLK_CNT; /* Default value. */ config->rx_ring_num = rx_ring_num; for (i = 0; i < MAX_RX_RINGS; i++) { config->rx_cfg[i].num_rxd = rx_ring_sz[i] * @@ -5310,7 +5332,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) /* initialize the shared memory used by the NIC and the host */ if (init_shared_mem(sp)) { DBG_PRINT(ERR_DBG, "%s: Memory allocation failed\n", - dev->name); + __FUNCTION__); ret = -ENOMEM; goto mem_alloc_failed; } @@ -5488,7 +5510,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) sp->def_mac_addr[0].mac_addr[3], sp->def_mac_addr[0].mac_addr[4], sp->def_mac_addr[0].mac_addr[5]); - int mode = s2io_print_pci_mode(sp); + mode = s2io_print_pci_mode(sp); if (mode < 0) { DBG_PRINT(ERR_DBG, " Unsupported PCI bus mode "); ret = -EBADSLT; From 303bcb4b675d7284a1097dd1c18c995c0179883a Mon Sep 17 00:00:00 2001 From: "raghavendra.koushik@neterion.com" Date: Wed, 3 Aug 2005 12:41:38 -0700 Subject: [PATCH 077/584] [PATCH] S2io: Errors found during review Hi, This is a patch to incorporate comments from earlier 12 patches. It also fixes a few issues we found during this time. Following is a list of changes in this patch. Item 1 incorporates earlier comments. Issues addressed in items 2 to 4 were discovered recently. 1. wmb() call in s2io_xmit() replaced with mmiowb(). 2. The dtx_control register was earlier programmed incorrectly for Xframe II adapter. 3. As suggested by hardware team, after a reset, in case of Xframe II adapter, we clear certain spurious errors by clearing PCI-X ECC status register, "detected parity error" bit in PCI_STATUS register and PCI_STATUS bit in txpic_int register. 4. On IBM PPC platforms, we found that in the Rx buffer replenish function, two memory writes(one to the the descriptor length and another to the ownership) were getting reordered. This was causing the adapter to see the ownership transfered to it before the length was updated. One solution was to add a wmb() but since this would turnout expensive on some platforms if called for every descriptor, we set the ownership bit and other fields of '2' to 'N' Rx descriptors followed by a wmb() and then set the ownership of first descriptor ('1'). Here the value 'N' is configurable by making it a module loadable parameter (rxsync_frequency). (NOTE: This parameter is a power of 2). 5. Bumped up the driver version no. to 2.0.2.1 Signed-off-by: Ravinandan Arakali Signed-off-by: Raghavendra Koushik Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 54 ++++++++++++++++++++++++++++++++++------------ drivers/net/s2io.h | 5 ----- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index abf910e40334..e083351e3f42 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -67,7 +67,7 @@ /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; -static char s2io_driver_version[] = "Version 2.0.2.0"; +static char s2io_driver_version[] = "Version 2.0.2.1"; static inline int RXD_IS_UP2DT(RxD_t *rxdp) { @@ -301,6 +301,8 @@ static unsigned int bimodal = 0; #ifndef CONFIG_S2IO_NAPI static unsigned int indicate_max_pkts; #endif +/* Frequency of Rx desc syncs expressed as power of 2 */ +static unsigned int rxsync_frequency = 3; /* * S2IO device table. @@ -837,7 +839,7 @@ static int init_nic(struct s2io_nic *nic) */ if (nic->device_type & XFRAME_II_DEVICE) { while (herc_act_dtx_cfg[dtx_cnt] != END_SIGN) { - SPECIAL_REG_WRITE(xena_dtx_cfg[dtx_cnt], + SPECIAL_REG_WRITE(herc_act_dtx_cfg[dtx_cnt], &bar0->dtx_control, UF); if (dtx_cnt & 0x1) msleep(1); /* Necessary!! */ @@ -2083,6 +2085,7 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no) #ifndef CONFIG_S2IO_NAPI unsigned long flags; #endif + RxD_t *first_rxdp = NULL; mac_control = &nic->mac_control; config = &nic->config; @@ -2202,6 +2205,10 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no) if (!skb) { DBG_PRINT(ERR_DBG, "%s: Out of ", dev->name); DBG_PRINT(ERR_DBG, "memory to allocate SKBs\n"); + if (first_rxdp) { + wmb(); + first_rxdp->Control_1 |= RXD_OWN_XENA; + } return -ENOMEM; } #ifndef CONFIG_2BUFF_MODE @@ -2212,7 +2219,8 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp->Control_2 &= (~MASK_BUFFER0_SIZE); rxdp->Control_2 |= SET_BUFFER0_SIZE(size); rxdp->Host_Control = (unsigned long) (skb); - rxdp->Control_1 |= RXD_OWN_XENA; + if (alloc_tab & ((1 << rxsync_frequency) - 1)) + rxdp->Control_1 |= RXD_OWN_XENA; off++; off %= (MAX_RXDS_PER_BLOCK + 1); mac_control->rings[ring_no].rx_curr_put_info.offset = off; @@ -2239,17 +2247,34 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp->Control_2 |= SET_BUFFER1_SIZE(1); /* dummy. */ rxdp->Control_2 |= BIT(0); /* Set Buffer_Empty bit. */ rxdp->Host_Control = (u64) ((unsigned long) (skb)); - rxdp->Control_1 |= RXD_OWN_XENA; + if (alloc_tab & ((1 << rxsync_frequency) - 1)) + rxdp->Control_1 |= RXD_OWN_XENA; off++; mac_control->rings[ring_no].rx_curr_put_info.offset = off; #endif rxdp->Control_2 |= SET_RXD_MARKER; + if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) { + if (first_rxdp) { + wmb(); + first_rxdp->Control_1 |= RXD_OWN_XENA; + } + first_rxdp = rxdp; + } atomic_inc(&nic->rx_bufs_left[ring_no]); alloc_tab++; } end: + /* Transfer ownership of first descriptor to adapter just before + * exiting. Before that, use memory barrier so that ownership + * and other fields are seen by adapter correctly. + */ + if (first_rxdp) { + wmb(); + first_rxdp->Control_1 |= RXD_OWN_XENA; + } + return SUCCESS; } @@ -2783,16 +2808,16 @@ void s2io_reset(nic_t * sp) s2io_set_swapper(sp); /* Clear certain PCI/PCI-X fields after reset */ - pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); - pci_cmd &= 0x7FFF; /* Clear parity err detect bit */ - pci_write_config_word(sp->pdev, PCI_COMMAND, pci_cmd); + if (sp->device_type == XFRAME_II_DEVICE) { + /* Clear parity err detect bit */ + pci_write_config_word(sp->pdev, PCI_STATUS, 0x8000); - val64 = readq(&bar0->txpic_int_reg); - val64 &= ~BIT(62); /* Clearing PCI_STATUS error reflected here */ - writeq(val64, &bar0->txpic_int_reg); + /* Clearing PCIX Ecc status register */ + pci_write_config_dword(sp->pdev, 0x68, 0x7C); - /* Clearing PCIX Ecc status register */ - pci_write_config_dword(sp->pdev, 0x68, 0); + /* Clearing PCI_STATUS error reflected here */ + writeq(BIT(62), &bar0->txpic_int_reg); + } /* Reset device statistics maintained by OS */ memset(&sp->stats, 0, sizeof (struct net_device_stats)); @@ -3168,8 +3193,6 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr; writeq(val64, &tx_fifo->TxDL_Pointer); - wmb(); - val64 = (TX_FIFO_LAST_TXD_NUM(frg_cnt) | TX_FIFO_FIRST_LIST | TX_FIFO_LAST_LIST); @@ -3179,6 +3202,8 @@ int s2io_xmit(struct sk_buff *skb, struct net_device *dev) #endif writeq(val64, &tx_fifo->List_Control); + mmiowb(); + put_off++; put_off %= mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; mac_control->fifos[queue].tx_curr_put_info.offset = put_off; @@ -5172,6 +5197,7 @@ module_param(bimodal, bool, 0); #ifndef CONFIG_S2IO_NAPI module_param(indicate_max_pkts, int, 0); #endif +module_param(rxsync_frequency, int, 0); /** * s2io_init_nic - Initialization of the adapter . diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 946314503daa..5d9270730ca2 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -13,11 +13,6 @@ #ifndef _S2IO_H #define _S2IO_H -/* Enable 2 buffer mode by default for SGI system */ -#ifdef CONFIG_IA64_SGI_SN2 -#define CONFIG_2BUFF_MODE -#endif - #define TBD 0 #define BIT(loc) (0x8000000000000000ULL >> (loc)) #define vBIT(val, loc, sz) (((u64)val) << (64-loc-sz)) From 67c4f3fa25502ce7ed82fb0307e09cf36f1f81da Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:07:25 -0400 Subject: [PATCH 078/584] Fix numerous minor problems with new phy subsystem. Includes fixes for problems noted by Adrian Bunk, Andrew Morton, and one other person lost in the annals of history (and email folders). --- drivers/net/phy/Kconfig | 12 +- drivers/net/phy/Makefile | 12 +- drivers/net/phy/mdio_bus.c | 4 +- drivers/net/phy/phy.c | 9 +- drivers/net/phy/phy.c.orig | 860 ----------------------------------- drivers/net/phy/phy_device.c | 48 +- include/linux/phy.h | 1 - 7 files changed, 53 insertions(+), 893 deletions(-) delete mode 100644 drivers/net/phy/phy.c.orig diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 8b5db2343cc3..c2f1bf1d02d2 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -5,7 +5,7 @@ menu "PHY device support" config PHYLIB - bool "PHY Device support and infrastructure" + tristate "PHY Device support and infrastructure" depends on NET_ETHERNET help Ethernet controllers are usually attached to PHY @@ -24,31 +24,31 @@ comment "MII PHY device drivers" depends on PHYLIB config MARVELL_PHY - bool "Drivers for Marvell PHYs" + tristate "Drivers for Marvell PHYs" depends on PHYLIB ---help--- Currently has a driver for the 88E1011S config DAVICOM_PHY - bool "Drivers for Davicom PHYs" + tristate "Drivers for Davicom PHYs" depends on PHYLIB ---help--- Currently supports dm9161e and dm9131 config QSEMI_PHY - bool "Drivers for Quality Semiconductor PHYs" + tristate "Drivers for Quality Semiconductor PHYs" depends on PHYLIB ---help--- Currently supports the qs6612 config LXT_PHY - bool "Drivers for the Intel LXT PHYs" + tristate "Drivers for the Intel LXT PHYs" depends on PHYLIB ---help--- Currently supports the lxt970, lxt971 config CICADA_PHY - bool "Drivers for the Cicada PHYs" + tristate "Drivers for the Cicada PHYs" depends on PHYLIB ---help--- Currently supports the cis8204 diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 1af05de6ced0..fb7cb385a659 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,9 +1,9 @@ # Makefile for Linux PHY drivers -obj-$(CONFIG_PHYLIB) += phy.o phy_device.o mdio_bus.o +libphy-objs := phy.o phy_device.o mdio_bus.o -obj-$(CONFIG_MARVELL_PHY) += marvell.o -obj-$(CONFIG_DAVICOM_PHY) += davicom.o -obj-$(CONFIG_CICADA_PHY) += cicada.o -obj-$(CONFIG_LXT_PHY) += lxt.o -obj-$(CONFIG_QSEMI_PHY) += qsemi.o +obj-$(CONFIG_MARVELL_PHY) += libphy.o marvell.o +obj-$(CONFIG_DAVICOM_PHY) += libphy.o davicom.o +obj-$(CONFIG_CICADA_PHY) += libphy.o cicada.o +obj-$(CONFIG_LXT_PHY) += libphy.o lxt.o +obj-$(CONFIG_QSEMI_PHY) += libphy.o qsemi.o diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e75103ba6f86..5fbea6acfe80 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -165,9 +165,9 @@ struct bus_type mdio_bus_type = { .resume = mdio_bus_resume, }; -static int __init mdio_bus_init(void) +int __init mdio_bus_init(void) { return bus_register(&mdio_bus_type); } -subsys_initcall(mdio_bus_init); + diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e2c6896b92d2..934065dd6371 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -39,7 +39,6 @@ #include #include -static void phy_change(void *data); static void phy_timer(unsigned long data); /* Convenience function to print out the current phy status @@ -464,7 +463,6 @@ void phy_stop_machine(struct phy_device *phydev) phydev->adjust_state = NULL; } -#ifdef CONFIG_PHYCONTROL /* phy_error: * * Moves the PHY to the HALTED state in response to a read @@ -479,6 +477,10 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } +#ifdef CONFIG_PHYCONTROL + +static void phy_change(void *data); + /* phy_interrupt * * description: When a PHY interrupt occurs, the handler disables @@ -672,6 +674,8 @@ void phy_start(struct phy_device *phydev) EXPORT_SYMBOL(phy_stop); EXPORT_SYMBOL(phy_start); +#endif /* CONFIG_PHYCONTROL */ + /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) { @@ -859,4 +863,3 @@ static void phy_timer(unsigned long data) mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); } -#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy.c.orig b/drivers/net/phy/phy.c.orig deleted file mode 100644 index 6af17cec9ace..000000000000 --- a/drivers/net/phy/phy.c.orig +++ /dev/null @@ -1,860 +0,0 @@ -/* - * drivers/net/phy/phy.c - * - * Framework for configuring and reading PHY devices - * Based on code in sungem_phy.c and gianfar_phy.c - * - * Author: Andy Fleming - * - * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static void phy_change(void *data); -static void phy_timer(unsigned long data); - -/* Convenience function to print out the current phy status - */ -void phy_print_status(struct phy_device *phydev) -{ - pr_info("%s: Link is %s", phydev->dev.bus_id, - phydev->link ? "Up" : "Down"); - if (phydev->link) - printk(" - %d/%s", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "Full" : "Half"); - - printk("\n"); -} -EXPORT_SYMBOL(phy_print_status); - - -/* Convenience functions for reading/writing a given PHY - * register. They MUST NOT be called from interrupt context, - * because the bus read/write functions may wait for an interrupt - * to conclude the operation. */ -int phy_read(struct phy_device *phydev, u16 regnum) -{ - int retval; - struct mii_bus *bus = phydev->bus; - - spin_lock_bh(&bus->mdio_lock); - retval = bus->read(bus, phydev->addr, regnum); - spin_unlock_bh(&bus->mdio_lock); - - return retval; -} -EXPORT_SYMBOL(phy_read); - -int phy_write(struct phy_device *phydev, u16 regnum, u16 val) -{ - int err; - struct mii_bus *bus = phydev->bus; - - spin_lock_bh(&bus->mdio_lock); - err = bus->write(bus, phydev->addr, regnum, val); - spin_unlock_bh(&bus->mdio_lock); - - return err; -} -EXPORT_SYMBOL(phy_write); - - -int phy_clear_interrupt(struct phy_device *phydev) -{ - int err = 0; - - if (phydev->drv->ack_interrupt) - err = phydev->drv->ack_interrupt(phydev); - - return err; -} - - -int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) -{ - int err = 0; - - phydev->interrupts = interrupts; - if (phydev->drv->config_intr) - err = phydev->drv->config_intr(phydev); - - return err; -} - - -/* phy_aneg_done - * - * description: Reads the status register and returns 0 either if - * auto-negotiation is incomplete, or if there was an error. - * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. - */ -static inline int phy_aneg_done(struct phy_device *phydev) -{ - int retval; - - retval = phy_read(phydev, MII_BMSR); - - return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); -} - -/* phy_start_aneg - * - * description: Calls the PHY driver's config_aneg, and then - * sets the PHY state to PHY_AN if auto-negotiation is enabled, - * and to PHY_FORCING if auto-negotiation is disabled. Unless - * the PHY is currently HALTED. - */ -int phy_start_aneg(struct phy_device *phydev) -{ - int err; - - spin_lock(&phydev->lock); - - if (AUTONEG_DISABLE == phydev->autoneg) - phy_sanitize_settings(phydev); - - err = phydev->drv->config_aneg(phydev); - - if (err < 0) - goto out_unlock; - - if (phydev->state != PHY_HALTED) { - if (AUTONEG_ENABLE == phydev->autoneg) { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; - } - } - -out_unlock: - spin_unlock(&phydev->lock); - return err; -} -EXPORT_SYMBOL(phy_start_aneg); - - -/* A structure for mapping a particular speed and duplex - * combination to a particular SUPPORTED and ADVERTISED value */ -struct phy_setting { - int speed; - int duplex; - u32 setting; -}; - -/* A mapping of all SUPPORTED settings to speed/duplex */ -static struct phy_setting settings[] = { - { - .speed = 10000, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_10000baseT_Full, - }, - { - .speed = SPEED_1000, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_1000baseT_Full, - }, - { - .speed = SPEED_1000, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_1000baseT_Half, - }, - { - .speed = SPEED_100, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_100baseT_Full, - }, - { - .speed = SPEED_100, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_100baseT_Half, - }, - { - .speed = SPEED_10, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_10baseT_Full, - }, - { - .speed = SPEED_10, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_10baseT_Half, - }, -}; - -#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) - -/* phy_find_setting - * - * description: Searches the settings array for the setting which - * matches the desired speed and duplex, and returns the index - * of that setting. Returns the index of the last setting if - * none of the others match. - */ -static inline int phy_find_setting(int speed, int duplex) -{ - int idx = 0; - - while (idx < ARRAY_SIZE(settings) && - (settings[idx].speed != speed || - settings[idx].duplex != duplex)) - idx++; - - return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; -} - -/* phy_find_valid - * idx: The first index in settings[] to search - * features: A mask of the valid settings - * - * description: Returns the index of the first valid setting less - * than or equal to the one pointed to by idx, as determined by - * the mask in features. Returns the index of the last setting - * if nothing else matches. - */ -static inline int phy_find_valid(int idx, u32 features) -{ - while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) - idx++; - - return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; -} - -/* phy_sanitize_settings - * - * description: Make sure the PHY is set to supported speeds and - * duplexes. Drop down by one in this order: 1000/FULL, - * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF - */ -void phy_sanitize_settings(struct phy_device *phydev) -{ - u32 features = phydev->supported; - int idx; - - /* Sanitize settings based on PHY capabilities */ - if ((features & SUPPORTED_Autoneg) == 0) - phydev->autoneg = 0; - - idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), - features); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; -} -EXPORT_SYMBOL(phy_sanitize_settings); - -/* phy_force_reduction - * - * description: Reduces the speed/duplex settings by - * one notch. The order is so: - * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, - * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. - */ -static void phy_force_reduction(struct phy_device *phydev) -{ - int idx; - - idx = phy_find_setting(phydev->speed, phydev->duplex); - - idx++; - - idx = phy_find_valid(idx, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - pr_info("Trying %d/%s\n", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "FULL" : "HALF"); -} - -/* phy_ethtool_sset: - * A generic ethtool sset function. Handles all the details - * - * A few notes about parameter checking: - * - We don't set port or transceiver, so we don't care what they - * were set to. - * - phy_start_aneg() will make sure forced settings are sane, and - * choose the next best ones from the ones selected, so we don't - * care if ethtool tries to give us bad values - */ -int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) -{ - if (cmd->phy_address != phydev->addr) - return -EINVAL; - - /* We make sure that we don't pass unsupported - * values in to the PHY */ - cmd->advertising &= phydev->supported; - - /* Verify the settings we care about. */ - if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) - return -EINVAL; - - if (cmd->autoneg == AUTONEG_DISABLE - && ((cmd->speed != SPEED_1000 - && cmd->speed != SPEED_100 - && cmd->speed != SPEED_10) - || (cmd->duplex != DUPLEX_HALF - && cmd->duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = cmd->autoneg; - - phydev->speed = cmd->speed; - - phydev->advertising = cmd->advertising; - - if (AUTONEG_ENABLE == cmd->autoneg) - phydev->advertising |= ADVERTISED_Autoneg; - else - phydev->advertising &= ~ADVERTISED_Autoneg; - - phydev->duplex = cmd->duplex; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} - -int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) -{ - cmd->supported = phydev->supported; - - cmd->advertising = phydev->advertising; - - cmd->speed = phydev->speed; - cmd->duplex = phydev->duplex; - cmd->port = PORT_MII; - cmd->phy_address = phydev->addr; - cmd->transceiver = XCVR_EXTERNAL; - cmd->autoneg = phydev->autoneg; - - return 0; -} - - -/* Note that this function is currently incompatible with the - * PHYCONTROL layer. It changes registers without regard to - * current state. Use at own risk - */ -int phy_mii_ioctl(struct phy_device *phydev, - struct mii_ioctl_data *mii_data, int cmd) -{ - u16 val = mii_data->val_in; - - switch (cmd) { - case SIOCGMIIPHY: - mii_data->phy_id = phydev->addr; - break; - case SIOCGMIIREG: - mii_data->val_out = phy_read(phydev, mii_data->reg_num); - break; - - case SIOCSMIIREG: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (mii_data->phy_id == phydev->addr) { - switch(mii_data->reg_num) { - case MII_BMCR: - if (val & (BMCR_RESET|BMCR_ANENABLE)) - phydev->autoneg = AUTONEG_DISABLE; - else - phydev->autoneg = AUTONEG_ENABLE; - if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - break; - case MII_ADVERTISE: - phydev->advertising = val; - break; - default: - /* do nothing */ - break; - } - } - - phy_write(phydev, mii_data->reg_num, val); - - if (mii_data->reg_num == MII_BMCR - && val & BMCR_RESET - && phydev->drv->config_init) - phydev->drv->config_init(phydev); - break; - } - - return 0; -} - -/* phy_start_machine: - * - * description: The PHY infrastructure can run a state machine - * which tracks whether the PHY is starting up, negotiating, - * etc. This function starts the timer which tracks the state - * of the PHY. If you want to be notified when the state - * changes, pass in the callback, otherwise, pass NULL. If you - * want to maintain your own state machine, do not call this - * function. */ -void phy_start_machine(struct phy_device *phydev, - void (*handler)(struct net_device *)) -{ - phydev->adjust_state = handler; - - init_timer(&phydev->phy_timer); - phydev->phy_timer.function = &phy_timer; - phydev->phy_timer.data = (unsigned long) phydev; - mod_timer(&phydev->phy_timer, jiffies + HZ); -} - -/* phy_stop_machine - * - * description: Stops the state machine timer, sets the state to - * UP (unless it wasn't up yet), and then frees the interrupt, - * if it is in use. This function must be called BEFORE - * phy_detach. - */ -void phy_stop_machine(struct phy_device *phydev) -{ - del_timer_sync(&phydev->phy_timer); - - spin_lock(&phydev->lock); - if (phydev->state > PHY_UP) - phydev->state = PHY_UP; - spin_unlock(&phydev->lock); - - if (phydev->irq != PHY_POLL) - phy_stop_interrupts(phydev); - - phydev->adjust_state = NULL; -} - -#ifdef CONFIG_PHYCONTROL -/* phy_error: - * - * Moves the PHY to the HALTED state in response to a read - * or write error, and tells the controller the link is down. - * Must not be called from interrupt context, or while the - * phydev->lock is held. - */ -void phy_error(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - phydev->state = PHY_HALTED; - spin_unlock(&phydev->lock); -} - -/* phy_interrupt - * - * description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. - */ -static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) -{ - struct phy_device *phydev = phy_dat; - - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. */ - disable_irq_nosync(irq); - - schedule_work(&phydev->phy_queue); - - return IRQ_HANDLED; -} - -/* Enable the interrupts from the PHY side */ -int phy_enable_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_clear_interrupt(phydev); - - if (err < 0) - return err; - - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - return err; -} - -/* Disable the PHY interrupts from the PHY side */ -int phy_disable_interrupts(struct phy_device *phydev) -{ - int err; - - /* Disable PHY interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - - if (err) - goto phy_err; - - /* Clear the interrupt */ - err = phy_clear_interrupt(phydev); - - if (err) - goto phy_err; - - return 0; - -phy_err: - phy_error(phydev); - - return err; -} - -/* phy_start_interrupts - * - * description: Request the interrupt for the given PHY. If - * this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. - * Returns 0 on success. - * This should only be called with a valid IRQ number. - */ -int phy_start_interrupts(struct phy_device *phydev) -{ - int err = 0; - - INIT_WORK(&phydev->phy_queue, phy_change, phydev); - - if (request_irq(phydev->irq, phy_interrupt, - SA_SHIRQ, - "phy_interrupt", - phydev) < 0) { - printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, - phydev->irq); - phydev->irq = PHY_POLL; - return 0; - } - - err = phy_enable_interrupts(phydev); - - return err; -} -EXPORT_SYMBOL(phy_start_interrupts); - -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - - -/* Scheduled by the phy_interrupt/timer to handle PHY changes */ -static void phy_change(void *data) -{ - int err; - struct phy_device *phydev = data; - - err = phy_disable_interrupts(phydev); - - if (err) - goto phy_err; - - spin_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - spin_unlock(&phydev->lock); - - enable_irq(phydev->irq); - - /* Reenable interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - if (err) - goto irq_enable_err; - - return; - -irq_enable_err: - disable_irq(phydev->irq); -phy_err: - phy_error(phydev); -} - -/* Bring down the PHY link, and stop checking the status. */ -void phy_stop(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - if (PHY_HALTED == phydev->state) - goto out_unlock; - - if (phydev->irq != PHY_POLL) { - /* Clear any pending interrupts */ - phy_clear_interrupt(phydev); - - /* Disable PHY Interrupts */ - phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - } - - phydev->state = PHY_HALTED; - -out_unlock: - spin_unlock(&phydev->lock); -} - - -/* phy_start - * - * description: Indicates the attached device's readiness to - * handle PHY-related work. Used during startup to start the - * PHY, and after a call to phy_stop() to resume operation. - * Also used to indicate the MDIO bus has cleared an error - * condition. - */ -void phy_start(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - switch (phydev->state) { - case PHY_STARTING: - phydev->state = PHY_PENDING; - break; - case PHY_READY: - phydev->state = PHY_UP; - break; - case PHY_HALTED: - phydev->state = PHY_RESUMING; - default: - break; - } - spin_unlock(&phydev->lock); -} -EXPORT_SYMBOL(phy_stop); -EXPORT_SYMBOL(phy_start); - -/* PHY timer which handles the state machine */ -static void phy_timer(unsigned long data) -{ - struct phy_device *phydev = (struct phy_device *)data; - int needs_aneg = 0; - int err = 0; - - spin_lock(&phydev->lock); - - if (phydev->adjust_state) - phydev->adjust_state(phydev->attached_dev); - - switch(phydev->state) { - case PHY_DOWN: - case PHY_STARTING: - case PHY_READY: - case PHY_PENDING: - break; - case PHY_UP: - needs_aneg = 1; - - phydev->link_timeout = PHY_AN_TIMEOUT; - - break; - case PHY_AN: - /* Check if negotiation is done. Break - * if there's an error */ - err = phy_aneg_done(phydev); - if (err < 0) - break; - - /* If auto-negotiation is done, we change to - * either RUNNING, or NOLINK */ - if (err > 0) { - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - phydev->state = PHY_NOLINK; - netif_carrier_off(phydev->attached_dev); - } - - phydev->adjust_link(phydev->attached_dev); - - } else if (0 == phydev->link_timeout--) { - /* The counter expired, so either we - * switch to forced mode, or the - * magic_aneg bit exists, and we try aneg - * again */ - if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { - int idx; - - /* We'll start from the - * fastest speed, and work - * our way down */ - idx = phy_find_valid(0, - phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - phydev->autoneg = AUTONEG_DISABLE; - phydev->state = PHY_FORCING; - phydev->link_timeout = - PHY_FORCE_TIMEOUT; - - pr_info("Trying %d/%s\n", - phydev->speed, - DUPLEX_FULL == - phydev->duplex ? - "FULL" : "HALF"); - } - - needs_aneg = 1; - } - break; - case PHY_NOLINK: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); - } - break; - case PHY_FORCING: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - if (0 == phydev->link_timeout--) { - phy_force_reduction(phydev); - needs_aneg = 1; - } - } - - phydev->adjust_link(phydev->attached_dev); - break; - case PHY_RUNNING: - /* Only register a CHANGE if we are - * polling */ - if (PHY_POLL == phydev->irq) - phydev->state = PHY_CHANGELINK; - break; - case PHY_CHANGELINK: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - phydev->state = PHY_NOLINK; - netif_carrier_off(phydev->attached_dev); - } - - phydev->adjust_link(phydev->attached_dev); - - if (PHY_POLL != phydev->irq) - err = phy_config_interrupt(phydev, - PHY_INTERRUPT_ENABLED); - break; - case PHY_HALTED: - if (phydev->link) { - phydev->link = 0; - netif_carrier_off(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); - } - break; - case PHY_RESUMING: - - err = phy_clear_interrupt(phydev); - - if (err) - break; - - err = phy_config_interrupt(phydev, - PHY_INTERRUPT_ENABLED); - - if (err) - break; - - if (AUTONEG_ENABLE == phydev->autoneg) { - err = phy_aneg_done(phydev); - if (err < 0) - break; - - /* err > 0 if AN is done. - * Otherwise, it's 0, and we're - * still waiting for AN */ - if (err > 0) { - phydev->state = PHY_RUNNING; - } else { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } - } else - phydev->state = PHY_RUNNING; - break; - } - - spin_unlock(&phydev->lock); - - if (needs_aneg) - err = phy_start_aneg(phydev); - - if (err < 0) - phy_error(phydev); - - mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); -} - -#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index f0595af4c837..c11138330fed 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -39,6 +39,19 @@ #include #include +static int genphy_config_init(struct phy_device *phydev); + +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner = THIS_MODULE, }, +}; + /* get_phy_device * * description: Reads the ID registers of the PHY at addr on the @@ -656,27 +669,32 @@ void phy_driver_unregister(struct phy_driver *drv) } EXPORT_SYMBOL(phy_driver_unregister); -static struct phy_driver genphy_driver = { - .phy_id = 0xffffffff, - .phy_id_mask = 0xffffffff, - .name = "Generic PHY", - .config_init = genphy_config_init, - .features = 0, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .driver = {.owner = THIS_MODULE, }, -}; -static int __init genphy_init(void) +static int __init phy_init(void) { - return phy_driver_register(&genphy_driver); + int rc; + extern int mdio_bus_init(void); + rc = phy_driver_register(&genphy_driver); + if (rc) + goto out; + + rc = mdio_bus_init(); + if (rc) + goto out_unreg; + + return 0; + +out_unreg: + phy_driver_unregister(&genphy_driver); +out: + return rc; } -static void __exit genphy_exit(void) +static void __exit phy_exit(void) { phy_driver_unregister(&genphy_driver); } -module_init(genphy_init); -module_exit(genphy_exit); +module_init(phy_init); +module_exit(phy_exit); diff --git a/include/linux/phy.h b/include/linux/phy.h index 3404804dc22d..72cb67b66e0c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -374,5 +374,4 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; -extern struct phy_driver genphy_driver; #endif /* __PHY_H */ From 2bf69b5fe90b3246ab50064c5a690a363e8c53e2 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:47:54 -0400 Subject: [PATCH 079/584] phy subsystem: more cleanups - unexport symbols never used outside of home module - remove dead code - remove CONFIG_PHYCONTROL, make it unconditionally enabled --- drivers/net/phy/Kconfig | 8 -- drivers/net/phy/mdio_bus.c | 74 ------------- drivers/net/phy/phy.c | 197 ++--------------------------------- drivers/net/phy/phy_device.c | 130 +---------------------- include/linux/phy.h | 17 --- 5 files changed, 12 insertions(+), 414 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c2f1bf1d02d2..6450bd71deb4 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -12,14 +12,6 @@ config PHYLIB devices. This option provides infrastructure for managing PHY devices. -config PHYCONTROL - bool "Support for automatically handling PHY state changes" - depends on PHYLIB - help - Adds code to perform all the work for keeping PHY link - state (speed/duplex/etc) up-to-date. Also handles - interrupts. - comment "MII PHY device drivers" depends on PHYLIB diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 5fbea6acfe80..d5a05be28818 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,80 +38,6 @@ #include #include -/* mdiobus_register - * - * description: Called by a bus driver to bring up all the PHYs - * on a given bus, and attach them to the bus - */ -int mdiobus_register(struct mii_bus *bus) -{ - int i; - int err = 0; - - spin_lock_init(&bus->mdio_lock); - - if (NULL == bus || NULL == bus->name || - NULL == bus->read || - NULL == bus->write) - return -EINVAL; - - if (bus->reset) - bus->reset(bus); - - for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev; - - phydev = get_phy_device(bus, i); - - if (IS_ERR(phydev)) - return PTR_ERR(phydev); - - /* There's a PHY at this address - * We need to set: - * 1) IRQ - * 2) bus_id - * 3) parent - * 4) bus - * 5) mii_bus - * And, we need to register it */ - if (phydev) { - phydev->irq = bus->irq[i]; - - phydev->dev.parent = bus->dev; - phydev->dev.bus = &mdio_bus_type; - sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); - - phydev->bus = bus; - - err = device_register(&phydev->dev); - - if (err) - printk(KERN_ERR "phy %d failed to register\n", - i); - } - - bus->phy_map[i] = phydev; - } - - pr_info("%s: probed\n", bus->name); - - return err; -} -EXPORT_SYMBOL(mdiobus_register); - -void mdiobus_unregister(struct mii_bus *bus) -{ - int i; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - if (bus->phy_map[i]) { - device_unregister(&bus->phy_map[i]->dev); - kfree(bus->phy_map[i]); - } - } -} -EXPORT_SYMBOL(mdiobus_unregister); - /* mdio_bus_match * * description: Given a PHY device, and a PHY driver, return 1 if diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 934065dd6371..d3e43631b89b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -40,21 +40,9 @@ #include static void phy_timer(unsigned long data); - -/* Convenience function to print out the current phy status - */ -void phy_print_status(struct phy_device *phydev) -{ - pr_info("%s: Link is %s", phydev->dev.bus_id, - phydev->link ? "Up" : "Down"); - if (phydev->link) - printk(" - %d/%s", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "Full" : "Half"); - - printk("\n"); -} -EXPORT_SYMBOL(phy_print_status); +static int phy_disable_interrupts(struct phy_device *phydev); +static void phy_sanitize_settings(struct phy_device *phydev); +static int phy_stop_interrupts(struct phy_device *phydev); /* Convenience functions for reading/writing a given PHY @@ -133,7 +121,7 @@ static inline int phy_aneg_done(struct phy_device *phydev) * and to PHY_FORCING if auto-negotiation is disabled. Unless * the PHY is currently HALTED. */ -int phy_start_aneg(struct phy_device *phydev) +static int phy_start_aneg(struct phy_device *phydev) { int err; @@ -161,8 +149,6 @@ out_unlock: spin_unlock(&phydev->lock); return err; } -EXPORT_SYMBOL(phy_start_aneg); - /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value */ @@ -255,7 +241,7 @@ static inline int phy_find_valid(int idx, u32 features) * duplexes. Drop down by one in this order: 1000/FULL, * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF */ -void phy_sanitize_settings(struct phy_device *phydev) +static void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; int idx; @@ -270,7 +256,6 @@ void phy_sanitize_settings(struct phy_device *phydev) phydev->speed = settings[idx].speed; phydev->duplex = settings[idx].duplex; } -EXPORT_SYMBOL(phy_sanitize_settings); /* phy_force_reduction * @@ -477,48 +462,22 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } -#ifdef CONFIG_PHYCONTROL - -static void phy_change(void *data); - -/* phy_interrupt - * - * description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. - */ -static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) -{ - struct phy_device *phydev = phy_dat; - - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. */ - disable_irq_nosync(irq); - - schedule_work(&phydev->phy_queue); - - return IRQ_HANDLED; -} - -/* Enable the interrupts from the PHY side */ -int phy_enable_interrupts(struct phy_device *phydev) +static int phy_stop_interrupts(struct phy_device *phydev) { int err; - err = phy_clear_interrupt(phydev); + err = phy_disable_interrupts(phydev); - if (err < 0) - return err; + if (err) + phy_error(phydev); - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + free_irq(phydev->irq, phydev); return err; } -EXPORT_SYMBOL(phy_enable_interrupts); /* Disable the PHY interrupts from the PHY side */ -int phy_disable_interrupts(struct phy_device *phydev) +static int phy_disable_interrupts(struct phy_device *phydev) { int err; @@ -541,140 +500,6 @@ phy_err: return err; } -EXPORT_SYMBOL(phy_disable_interrupts); - -/* phy_start_interrupts - * - * description: Request the interrupt for the given PHY. If - * this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. - * Returns 0 on success. - * This should only be called with a valid IRQ number. - */ -int phy_start_interrupts(struct phy_device *phydev) -{ - int err = 0; - - INIT_WORK(&phydev->phy_queue, phy_change, phydev); - - if (request_irq(phydev->irq, phy_interrupt, - SA_SHIRQ, - "phy_interrupt", - phydev) < 0) { - printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, - phydev->irq); - phydev->irq = PHY_POLL; - return 0; - } - - err = phy_enable_interrupts(phydev); - - return err; -} -EXPORT_SYMBOL(phy_start_interrupts); - -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - - -/* Scheduled by the phy_interrupt/timer to handle PHY changes */ -static void phy_change(void *data) -{ - int err; - struct phy_device *phydev = data; - - err = phy_disable_interrupts(phydev); - - if (err) - goto phy_err; - - spin_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - spin_unlock(&phydev->lock); - - enable_irq(phydev->irq); - - /* Reenable interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - if (err) - goto irq_enable_err; - - return; - -irq_enable_err: - disable_irq(phydev->irq); -phy_err: - phy_error(phydev); -} - -/* Bring down the PHY link, and stop checking the status. */ -void phy_stop(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - if (PHY_HALTED == phydev->state) - goto out_unlock; - - if (phydev->irq != PHY_POLL) { - /* Clear any pending interrupts */ - phy_clear_interrupt(phydev); - - /* Disable PHY Interrupts */ - phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - } - - phydev->state = PHY_HALTED; - -out_unlock: - spin_unlock(&phydev->lock); -} - - -/* phy_start - * - * description: Indicates the attached device's readiness to - * handle PHY-related work. Used during startup to start the - * PHY, and after a call to phy_stop() to resume operation. - * Also used to indicate the MDIO bus has cleared an error - * condition. - */ -void phy_start(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - switch (phydev->state) { - case PHY_STARTING: - phydev->state = PHY_PENDING; - break; - case PHY_READY: - phydev->state = PHY_UP; - break; - case PHY_HALTED: - phydev->state = PHY_RESUMING; - default: - break; - } - spin_unlock(&phydev->lock); -} -EXPORT_SYMBOL(phy_stop); -EXPORT_SYMBOL(phy_start); - -#endif /* CONFIG_PHYCONTROL */ /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c11138330fed..c44d54f6310a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -124,133 +124,6 @@ void phy_prepare_link(struct phy_device *phydev, phydev->adjust_link = handler; } -#ifdef CONFIG_PHYCONTROL -/* phy_connect: - * - * description: Convenience function for connecting ethernet - * devices to PHY devices. The default behavior is for - * the PHY infrastructure to handle everything, and only notify - * the connected driver when the link status changes. If you - * don't want, or can't use the provided functionality, you may - * choose to call only the subset of functions which provide - * the desired functionality. - */ -struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, - void (*handler)(struct net_device *), u32 flags) -{ - struct phy_device *phydev; - - phydev = phy_attach(dev, phy_id, flags); - - if (IS_ERR(phydev)) - return phydev; - - phy_prepare_link(phydev, handler); - - phy_start_machine(phydev, NULL); - - if (phydev->irq > 0) - phy_start_interrupts(phydev); - - return phydev; -} -EXPORT_SYMBOL(phy_connect); - -void phy_disconnect(struct phy_device *phydev) -{ - if (phydev->irq > 0) - phy_stop_interrupts(phydev); - - phy_stop_machine(phydev); - - phydev->adjust_link = NULL; - - phy_detach(phydev); -} -EXPORT_SYMBOL(phy_disconnect); - -#endif /* CONFIG_PHYCONTROL */ - -/* phy_attach: - * - * description: Called by drivers to attach to a particular PHY - * device. The phy_device is found, and properly hooked up - * to the phy_driver. If no driver is attached, then the - * genphy_driver is used. The phy_device is given a ptr to - * the attaching device, and given a callback for link status - * change. The phy_device is returned to the attaching - * driver. - */ -static int phy_compare_id(struct device *dev, void *data) -{ - return strcmp((char *)data, dev->bus_id) ? 0 : 1; -} - -struct phy_device *phy_attach(struct net_device *dev, - const char *phy_id, u32 flags) -{ - struct bus_type *bus = &mdio_bus_type; - struct phy_device *phydev; - struct device *d; - - /* Search the list of PHY devices on the mdio bus for the - * PHY with the requested name */ - d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); - - if (d) { - phydev = to_phy_device(d); - } else { - printk(KERN_ERR "%s not found\n", phy_id); - return ERR_PTR(-ENODEV); - } - - /* Assume that if there is no driver, that it doesn't - * exist, and we should use the genphy driver. */ - if (NULL == d->driver) { - int err; - down_write(&d->bus->subsys.rwsem); - d->driver = &genphy_driver.driver; - - err = d->driver->probe(d); - - if (err < 0) - return ERR_PTR(err); - - device_bind_driver(d); - up_write(&d->bus->subsys.rwsem); - } - - if (phydev->attached_dev) { - printk(KERN_ERR "%s: %s already attached\n", - dev->name, phy_id); - return ERR_PTR(-EBUSY); - } - - phydev->attached_dev = dev; - - phydev->dev_flags = flags; - - return phydev; -} -EXPORT_SYMBOL(phy_attach); - -void phy_detach(struct phy_device *phydev) -{ - phydev->attached_dev = NULL; - - /* If the device had no specific driver before (i.e. - it - * was using the generic driver), we unbind the device - * from the generic driver so that there's a chance a - * real driver could be loaded */ - if (phydev->dev.driver == &genphy_driver.driver) { - down_write(&phydev->dev.bus->subsys.rwsem); - device_release_driver(&phydev->dev); - up_write(&phydev->dev.bus->subsys.rwsem); - } -} -EXPORT_SYMBOL(phy_detach); - - /* Generic PHY support and helper functions */ /* genphy_config_advert @@ -259,7 +132,7 @@ EXPORT_SYMBOL(phy_detach); * after sanitizing the values to make sure we only advertise * what is supported */ -int genphy_config_advert(struct phy_device *phydev) +static int genphy_config_advert(struct phy_device *phydev) { u32 advertise; int adv; @@ -317,7 +190,6 @@ int genphy_config_advert(struct phy_device *phydev) return adv; } -EXPORT_SYMBOL(genphy_config_advert); /* genphy_setup_forced * diff --git a/include/linux/phy.h b/include/linux/phy.h index 72cb67b66e0c..4f2b5effc16b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,26 +334,11 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); -struct phy_device * phy_attach(struct net_device *dev, - const char *phy_id, u32 flags); -struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, - void (*handler)(struct net_device *), u32 flags); -void phy_disconnect(struct phy_device *phydev); -void phy_detach(struct phy_device *phydev); -void phy_start(struct phy_device *phydev); -void phy_stop(struct phy_device *phydev); -int phy_start_aneg(struct phy_device *phydev); - -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void phy_sanitize_settings(struct phy_device *phydev); -int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } -int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -370,8 +355,6 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); -int phy_start_interrupts(struct phy_device *phydev); -void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ From 972dcafb6d743a6c7611a2e4681ed814e30d6230 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 11 Aug 2005 03:35:53 -0400 Subject: [PATCH 080/584] [libata scsi] add START STOP UNIT translation --- drivers/scsi/libata-scsi.c | 56 ++++++++++++++++++++++++++++++++++++++ include/linux/ata.h | 2 ++ 2 files changed, 58 insertions(+) diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 6a75ec2187fd..f58311b8c050 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -391,6 +391,60 @@ int ata_scsi_error(struct Scsi_Host *host) return 0; } +/** + * ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command + * @qc: Storage for translated ATA taskfile + * @scsicmd: SCSI command to translate + * + * Sets up an ATA taskfile to issue STANDBY (to stop) or READ VERIFY + * (to start). Perhaps these commands should be preceded by + * CHECK POWER MODE to see what power mode the device is already in. + * [See SAT revision 5 at www.t10.org] + * + * LOCKING: + * spin_lock_irqsave(host_set lock) + * + * RETURNS: + * Zero on success, non-zero on error. + */ + +static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc, + u8 *scsicmd) +{ + struct ata_taskfile *tf = &qc->tf; + + tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf->protocol = ATA_PROT_NODATA; + if (scsicmd[1] & 0x1) { + ; /* ignore IMMED bit, violates sat-r05 */ + } + if (scsicmd[4] & 0x2) + return 1; /* LOEJ bit set not supported */ + if (((scsicmd[4] >> 4) & 0xf) != 0) + return 1; /* power conditions not supported */ + if (scsicmd[4] & 0x1) { + tf->nsect = 1; /* 1 sector, lba=0 */ + tf->lbah = 0x0; + tf->lbam = 0x0; + tf->lbal = 0x0; + tf->device |= ATA_LBA; + tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ + } else { + tf->nsect = 0; /* time period value (0 implies now) */ + tf->command = ATA_CMD_STANDBY; + /* Consider: ATA STANDBY IMMEDIATE command */ + } + /* + * Standby and Idle condition timers could be implemented but that + * would require libata to implement the Power condition mode page + * and allow the user to change it. Changing mode pages requires + * MODE SELECT to be implemented. + */ + + return 0; +} + + /** * ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command * @qc: Storage for translated ATA taskfile @@ -1435,6 +1489,8 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) case VERIFY: case VERIFY_16: return ata_scsi_verify_xlat; + case START_STOP: + return ata_scsi_start_stop_xlat; } return NULL; diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf9981..9d25e9886d60 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -108,6 +108,8 @@ enum { /* ATA device commands */ ATA_CMD_CHK_POWER = 0xE5, /* check power mode */ + ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ + ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, From ea8f400c98ec9ae0604bc5a6721174ef68635815 Mon Sep 17 00:00:00 2001 From: Peer Chen Date: Thu, 11 Aug 2005 15:09:23 -0400 Subject: [PATCH 081/584] [netdrvr tulip] Remove ULi-specific code from generic tulip driver It has a separate driver now, 'uli526x'. --- drivers/net/tulip/media.c | 36 ---------------------------------- drivers/net/tulip/timer.c | 1 - drivers/net/tulip/tulip.h | 8 ++------ drivers/net/tulip/tulip_core.c | 34 +++++++++++--------------------- 4 files changed, 13 insertions(+), 66 deletions(-) diff --git a/drivers/net/tulip/media.c b/drivers/net/tulip/media.c index e26c31f944bf..f53396fe79c9 100644 --- a/drivers/net/tulip/media.c +++ b/drivers/net/tulip/media.c @@ -81,25 +81,6 @@ int tulip_mdio_read(struct net_device *dev, int phy_id, int location) return retval & 0xffff; } - if(tp->chip_id == ULI526X && tp->revision >= 0x40) { - int value; - int i = 1000; - - value = ioread32(ioaddr + CSR9); - iowrite32(value & 0xFFEFFFFF, ioaddr + CSR9); - - value = (phy_id << 21) | (location << 16) | 0x08000000; - iowrite32(value, ioaddr + CSR10); - - while(--i > 0) { - mdio_delay(); - if(ioread32(ioaddr + CSR10) & 0x10000000) - break; - } - retval = ioread32(ioaddr + CSR10); - spin_unlock_irqrestore(&tp->mii_lock, flags); - return retval & 0xFFFF; - } /* Establish sync by sending at least 32 logic ones. */ for (i = 32; i >= 0; i--) { iowrite32(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr); @@ -159,23 +140,6 @@ void tulip_mdio_write(struct net_device *dev, int phy_id, int location, int val) spin_unlock_irqrestore(&tp->mii_lock, flags); return; } - if (tp->chip_id == ULI526X && tp->revision >= 0x40) { - int value; - int i = 1000; - - value = ioread32(ioaddr + CSR9); - iowrite32(value & 0xFFEFFFFF, ioaddr + CSR9); - - value = (phy_id << 21) | (location << 16) | 0x04000000 | (val & 0xFFFF); - iowrite32(value, ioaddr + CSR10); - - while(--i > 0) { - if (ioread32(ioaddr + CSR10) & 0x10000000) - break; - } - spin_unlock_irqrestore(&tp->mii_lock, flags); - return; - } /* Establish sync by sending 32 logic ones. */ for (i = 32; i >= 0; i--) { diff --git a/drivers/net/tulip/timer.c b/drivers/net/tulip/timer.c index 691568283553..e058a9fbfe88 100644 --- a/drivers/net/tulip/timer.c +++ b/drivers/net/tulip/timer.c @@ -39,7 +39,6 @@ void tulip_timer(unsigned long data) case MX98713: case COMPEX9881: case DM910X: - case ULI526X: default: { struct medialeaf *mleaf; unsigned char *p; diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index 20346d847d9e..05d2d96f7be2 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -88,7 +88,6 @@ enum chips { I21145, DM910X, CONEXANT, - ULI526X }; @@ -482,11 +481,8 @@ static inline void tulip_stop_rxtx(struct tulip_private *tp) static inline void tulip_restart_rxtx(struct tulip_private *tp) { - if(!(tp->chip_id == ULI526X && - (tp->revision == 0x40 || tp->revision == 0x50))) { - tulip_stop_rxtx(tp); - udelay(5); - } + tulip_stop_rxtx(tp); + udelay(5); tulip_start_rxtx(tp); } diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index d45d8f56e5b4..05da5bea564c 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -199,9 +199,6 @@ struct tulip_chip_table tulip_tbl[] = { { "Conexant LANfinity", 256, 0x0001ebef, HAS_MII | HAS_ACPI, tulip_timer }, - /* ULi526X */ - { "ULi M5261/M5263", 128, 0x0001ebef, - HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI, tulip_timer }, }; @@ -239,8 +236,6 @@ static struct pci_device_id tulip_pci_tbl[] = { { 0x1737, 0xAB09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x1737, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x17B3, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, - { 0x10b9, 0x5261, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ULI526X }, /* ALi 1563 integrated ethernet */ - { 0x10b9, 0x5263, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ULI526X }, /* ALi 1563 integrated ethernet */ { 0x10b7, 0x9300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, /* 3Com 3CSOHO100B-TX */ { 0x14ea, 0xab08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, /* Planex FNW-3602-TX */ { } /* terminate list */ @@ -522,7 +517,7 @@ static void tulip_tx_timeout(struct net_device *dev) dev->name); } else if (tp->chip_id == DC21140 || tp->chip_id == DC21142 || tp->chip_id == MX98713 || tp->chip_id == COMPEX9881 - || tp->chip_id == DM910X || tp->chip_id == ULI526X) { + || tp->chip_id == DM910X) { printk(KERN_WARNING "%s: 21140 transmit timed out, status %8.8x, " "SIA %8.8x %8.8x %8.8x %8.8x, resetting...\n", dev->name, ioread32(ioaddr + CSR5), ioread32(ioaddr + CSR12), @@ -1103,18 +1098,16 @@ static void set_rx_mode(struct net_device *dev) entry = tp->cur_tx++ % TX_RING_SIZE; if (entry != 0) { - /* Avoid a chip errata by prefixing a dummy entry. Don't do - this on the ULI526X as it triggers a different problem */ - if (!(tp->chip_id == ULI526X && (tp->revision == 0x40 || tp->revision == 0x50))) { - tp->tx_buffers[entry].skb = NULL; - tp->tx_buffers[entry].mapping = 0; - tp->tx_ring[entry].length = - (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0; - tp->tx_ring[entry].buffer1 = 0; - /* Must set DescOwned later to avoid race with chip */ - dummy = entry; - entry = tp->cur_tx++ % TX_RING_SIZE; - } + /* Avoid a chip errata by prefixing a dummy entry. */ + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; + tp->tx_ring[entry].length = + (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0; + tp->tx_ring[entry].buffer1 = 0; + /* Must set DescOwned later to avoid race with chip */ + dummy = entry; + entry = tp->cur_tx++ % TX_RING_SIZE; + } tp->tx_buffers[entry].skb = NULL; @@ -1235,10 +1228,6 @@ static int tulip_uli_dm_quirk(struct pci_dev *pdev) { if (pdev->vendor == 0x1282 && pdev->device == 0x9102) return 1; - if (pdev->vendor == 0x10b9 && pdev->device == 0x5261) - return 1; - if (pdev->vendor == 0x10b9 && pdev->device == 0x5263) - return 1; return 0; } @@ -1680,7 +1669,6 @@ static int __devinit tulip_init_one (struct pci_dev *pdev, switch (chip_idx) { case DC21140: case DM910X: - case ULI526X: default: if (tp->mtable) iowrite32(tp->mtable->csr12dir | 0x100, ioaddr + CSR12); From 9c15d24f2420c2155eccd32d7ab909a9c0e63c2b Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:58:02 -0700 Subject: [PATCH 082/584] [PATCH] ixgb: Set RXDCTL:PTHRESH/HTHRESH to zero Set RXDCTL:PTHRESH/HTHRESH to zero Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 097b90ccf575..492783be205e 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -145,10 +145,12 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); /* some defines for controlling descriptor fetches in h/w */ -#define RXDCTL_PTHRESH_DEFAULT 128 /* chip considers prefech below this */ -#define RXDCTL_HTHRESH_DEFAULT 16 /* chip will only prefetch if tail is - pushed this many descriptors from head */ #define RXDCTL_WTHRESH_DEFAULT 16 /* chip writes back at this many or RXT0 */ +#define RXDCTL_PTHRESH_DEFAULT 0 /* chip considers prefech below + * this */ +#define RXDCTL_HTHRESH_DEFAULT 0 /* chip will only prefetch if tail + * is pushed this many descriptors + * from head */ /** * ixgb_init_module - Driver Registration Routine From 51b54b512cd26c4477ccd57b8d3736b99ccef7a0 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:58:23 -0700 Subject: [PATCH 083/584] [PATCH] ixgb: Fix unnecessary link state messages Fix unnecessary link state messages Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ethtool.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index 3fa113854eeb..94bc3d41cfa3 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -130,6 +130,12 @@ ixgb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ixgb_down(adapter, TRUE); ixgb_reset(adapter); ixgb_up(adapter); + /* be optimistic about our link, since we were up before */ + adapter->link_speed = 10000; + adapter->link_duplex = FULL_DUPLEX; + netif_carrier_on(netdev); + netif_wake_queue(netdev); + } else ixgb_reset(adapter); @@ -177,6 +183,11 @@ ixgb_set_pauseparam(struct net_device *netdev, if(netif_running(adapter->netdev)) { ixgb_down(adapter, TRUE); ixgb_up(adapter); + /* be optimistic about our link, since we were up before */ + adapter->link_speed = 10000; + adapter->link_duplex = FULL_DUPLEX; + netif_carrier_on(netdev); + netif_wake_queue(netdev); } else ixgb_reset(adapter); @@ -199,6 +210,11 @@ ixgb_set_rx_csum(struct net_device *netdev, uint32_t data) if(netif_running(netdev)) { ixgb_down(adapter,TRUE); ixgb_up(adapter); + /* be optimistic about our link, since we were up before */ + adapter->link_speed = 10000; + adapter->link_duplex = FULL_DUPLEX; + netif_carrier_on(netdev); + netif_wake_queue(netdev); } else ixgb_reset(adapter); return 0; @@ -573,6 +589,11 @@ ixgb_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_new; if((err = ixgb_up(adapter))) return err; + /* be optimistic about our link, since we were up before */ + adapter->link_speed = 10000; + adapter->link_duplex = FULL_DUPLEX; + netif_carrier_on(netdev); + netif_wake_queue(netdev); } return 0; From 8908c6cd1d6889850148aeb50bb14301959adaa7 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:58:40 -0700 Subject: [PATCH 084/584] [PATCH] ixgb: Use netdev_priv() instead of netdev->priv Use netdev_priv() instead of netdev->priv Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ethtool.c | 30 ++++++++++++++++-------------- drivers/net/ixgb/ixgb_main.c | 32 ++++++++++++++++---------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index 94bc3d41cfa3..c80fa0007904 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -98,7 +98,7 @@ static struct ixgb_stats ixgb_gstrings_stats[] = { static int ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); ecmd->advertising = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); @@ -120,7 +120,7 @@ ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) static int ixgb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); if(ecmd->autoneg == AUTONEG_ENABLE || ecmd->speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL) @@ -146,7 +146,7 @@ static void ixgb_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; pause->autoneg = AUTONEG_DISABLE; @@ -165,7 +165,7 @@ static int ixgb_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; if(pause->autoneg == AUTONEG_ENABLE) @@ -197,14 +197,16 @@ ixgb_set_pauseparam(struct net_device *netdev, static uint32_t ixgb_get_rx_csum(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); + return adapter->rx_csum; } static int ixgb_set_rx_csum(struct net_device *netdev, uint32_t data) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); + adapter->rx_csum = data; if(netif_running(netdev)) { @@ -262,7 +264,7 @@ static void ixgb_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; uint32_t *reg = p; uint32_t *reg_start = reg; @@ -407,7 +409,7 @@ static int ixgb_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, uint8_t *bytes) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; uint16_t *eeprom_buff; int i, max_len, first_word, last_word; @@ -455,7 +457,7 @@ static int ixgb_set_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, uint8_t *bytes) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; uint16_t *eeprom_buff; void *ptr; @@ -513,7 +515,7 @@ static void ixgb_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); strncpy(drvinfo->driver, ixgb_driver_name, 32); strncpy(drvinfo->version, ixgb_driver_version, 32); @@ -528,7 +530,7 @@ static void ixgb_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_desc_ring *txdr = &adapter->tx_ring; struct ixgb_desc_ring *rxdr = &adapter->rx_ring; @@ -546,7 +548,7 @@ static int ixgb_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_desc_ring *txdr = &adapter->tx_ring; struct ixgb_desc_ring *rxdr = &adapter->rx_ring; struct ixgb_desc_ring tx_old, tx_new, rx_old, rx_new; @@ -628,7 +630,7 @@ ixgb_led_blink_callback(unsigned long data) static int ixgb_phys_id(struct net_device *netdev, uint32_t data) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); if(!data || data > (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ)) data = (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ); @@ -664,7 +666,7 @@ static void ixgb_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, uint64_t *data) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); int i; ixgb_update_stats(adapter); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 492783be205e..d1fc431cb1c9 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -378,7 +378,7 @@ ixgb_probe(struct pci_dev *pdev, SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, netdev); - adapter = netdev->priv; + adapter = netdev_priv(netdev); adapter->netdev = netdev; adapter->pdev = pdev; adapter->hw.back = adapter; @@ -514,7 +514,7 @@ static void __devexit ixgb_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); unregister_netdev(netdev); @@ -585,7 +585,7 @@ ixgb_sw_init(struct ixgb_adapter *adapter) static int ixgb_open(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); int err; /* allocate transmit descriptors */ @@ -628,7 +628,7 @@ err_setup_tx: static int ixgb_close(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); ixgb_down(adapter, TRUE); @@ -1019,7 +1019,7 @@ ixgb_clean_rx_ring(struct ixgb_adapter *adapter) static int ixgb_set_mac(struct net_device *netdev, void *p) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; if(!is_valid_ether_addr(addr->sa_data)) @@ -1045,7 +1045,7 @@ ixgb_set_mac(struct net_device *netdev, void *p) static void ixgb_set_multi(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; struct dev_mc_list *mc_ptr; uint32_t rctl; @@ -1373,7 +1373,7 @@ ixgb_tx_queue(struct ixgb_adapter *adapter, int count, int vlan_id,int tx_flags) static int ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); unsigned int first; unsigned int tx_flags = 0; unsigned long flags; @@ -1427,7 +1427,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) static void ixgb_tx_timeout(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ schedule_work(&adapter->tx_timeout_task); @@ -1436,7 +1436,7 @@ ixgb_tx_timeout(struct net_device *netdev) static void ixgb_tx_timeout_task(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); ixgb_down(adapter, TRUE); ixgb_up(adapter); @@ -1453,7 +1453,7 @@ ixgb_tx_timeout_task(struct net_device *netdev) static struct net_device_stats * ixgb_get_stats(struct net_device *netdev) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); return &adapter->net_stats; } @@ -1469,7 +1469,7 @@ ixgb_get_stats(struct net_device *netdev) static int ixgb_change_mtu(struct net_device *netdev, int new_mtu) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH; int old_max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH; @@ -1643,7 +1643,7 @@ static irqreturn_t ixgb_intr(int irq, void *data, struct pt_regs *regs) { struct net_device *netdev = data; - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); struct ixgb_hw *hw = &adapter->hw; uint32_t icr = IXGB_READ_REG(hw, ICR); #ifndef CONFIG_IXGB_NAPI @@ -1690,7 +1690,7 @@ ixgb_intr(int irq, void *data, struct pt_regs *regs) static int ixgb_clean(struct net_device *netdev, int *budget) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); int work_to_do = min(*budget, netdev->quota); int tx_cleaned; int work_done = 0; @@ -2019,7 +2019,7 @@ ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter) static void ixgb_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); uint32_t ctrl, rctl; ixgb_irq_disable(adapter); @@ -2057,7 +2057,7 @@ ixgb_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) static void ixgb_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); uint32_t vfta, index; /* add VID to filter table */ @@ -2071,7 +2071,7 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid) static void ixgb_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid) { - struct ixgb_adapter *adapter = netdev->priv; + struct ixgb_adapter *adapter = netdev_priv(netdev); uint32_t vfta, index; ixgb_irq_disable(adapter); From 7b89178d1d803c854dfd6f4e81633109a1238884 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:58:55 -0700 Subject: [PATCH 085/584] [PATCH] ixgb: Fix Broadcast/Multicast packets received statistics Fix Broadcast/Multicast packets received statistics Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index d1fc431cb1c9..d7a0f4e36118 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1524,7 +1524,8 @@ ixgb_update_stats(struct ixgb_adapter *adapter) multi |= ((u64)IXGB_READ_REG(&adapter->hw, MPRCH) << 32); /* fix up multicast stats by removing broadcasts */ - multi -= bcast; + if(multi >= bcast) + multi -= bcast; adapter->stats.mprcl += (multi & 0xFFFFFFFF); adapter->stats.mprch += (multi >> 32); From 9ef2eec39383f8fe2bd7c9fac4dfdd4fdf7173e6 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:59:07 -0700 Subject: [PATCH 086/584] [PATCH] ixgb: Fix data output by ethtool -d Fix data output by ethtool -d Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index c80fa0007904..44a07a9dcbc1 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -301,7 +301,8 @@ ixgb_get_regs(struct net_device *netdev, *reg++ = IXGB_READ_REG(hw, RAIDC); /* 19 */ *reg++ = IXGB_READ_REG(hw, RXCSUM); /* 20 */ - for (i = 0; i < IXGB_RAR_ENTRIES; i++) { + /* there are 16 RAR entries in hardware, we only use 3 */ + for(i = 0; i < 16; i++) { *reg++ = IXGB_READ_REG_ARRAY(hw, RAL, (i << 1)); /*21,...,51 */ *reg++ = IXGB_READ_REG_ARRAY(hw, RAH, (i << 1)); /*22,...,52 */ } From db0bacaa8313e00bb571e2d1102dc9f567353a24 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:59:20 -0700 Subject: [PATCH 087/584] [PATCH] ixgb: Ethtool cleanup patch from Stephen Hemminger Ethtool cleanup patch from Stephen Hemminger * use ADVERTISED_xxx fields when setting advertised fields Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index 44a07a9dcbc1..762e3a0e92b0 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -101,7 +101,7 @@ ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) struct ixgb_adapter *adapter = netdev_priv(netdev); ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); - ecmd->advertising = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); ecmd->port = PORT_FIBRE; ecmd->transceiver = XCVR_EXTERNAL; From fcb01756e8e95e8d4e423377bc435e8856194328 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:59:31 -0700 Subject: [PATCH 088/584] [PATCH] ixgb: Remove unused functions Remove unused functions, render some variable static instead of global - based on patch from Adrian Bunk Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ee.c | 170 +------------------------------------ drivers/net/ixgb/ixgb_hw.h | 9 -- 2 files changed, 1 insertion(+), 178 deletions(-) diff --git a/drivers/net/ixgb/ixgb_ee.c b/drivers/net/ixgb/ixgb_ee.c index 3aae110c5560..661a46b95a61 100644 --- a/drivers/net/ixgb/ixgb_ee.c +++ b/drivers/net/ixgb/ixgb_ee.c @@ -565,24 +565,6 @@ ixgb_get_ee_mac_addr(struct ixgb_hw *hw, } } -/****************************************************************************** - * return the compatibility flags from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * compatibility flags if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_compatibility(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->compatibility)); - - return(0); -} /****************************************************************************** * return the Printed Board Assembly number from EEPROM @@ -602,81 +584,6 @@ ixgb_get_ee_pba_number(struct ixgb_hw *hw) return(0); } -/****************************************************************************** - * return the Initialization Control Word 1 from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * Initialization Control Word 1 if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_init_ctrl_reg_1(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->init_ctrl_reg_1)); - - return(0); -} - -/****************************************************************************** - * return the Initialization Control Word 2 from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * Initialization Control Word 2 if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_init_ctrl_reg_2(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->init_ctrl_reg_2)); - - return(0); -} - -/****************************************************************************** - * return the Subsystem Id from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * Subsystem Id if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_subsystem_id(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->subsystem_id)); - - return(0); -} - -/****************************************************************************** - * return the Sub Vendor Id from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * Sub Vendor Id if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_subvendor_id(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->subvendor_id)); - - return(0); -} /****************************************************************************** * return the Device Id from EEPROM @@ -694,81 +601,6 @@ ixgb_get_ee_device_id(struct ixgb_hw *hw) if(ixgb_check_and_get_eeprom_data(hw) == TRUE) return (le16_to_cpu(ee_map->device_id)); - return(0); + return (0); } -/****************************************************************************** - * return the Vendor Id from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * Device Id if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_vendor_id(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->vendor_id)); - - return(0); -} - -/****************************************************************************** - * return the Software Defined Pins Register from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * SDP Register if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint16_t -ixgb_get_ee_swdpins_reg(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->swdpins_reg)); - - return(0); -} - -/****************************************************************************** - * return the D3 Power Management Bits from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * D3 Power Management Bits if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint8_t -ixgb_get_ee_d3_power(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->d3_power)); - - return(0); -} - -/****************************************************************************** - * return the D0 Power Management Bits from EEPROM - * - * hw - Struct containing variables accessed by shared code - * - * Returns: - * D0 Power Management Bits if EEPROM contents are valid, 0 otherwise - ******************************************************************************/ -uint8_t -ixgb_get_ee_d0_power(struct ixgb_hw *hw) -{ - struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom; - - if(ixgb_check_and_get_eeprom_data(hw) == TRUE) - return (le16_to_cpu(ee_map->d0_power)); - - return(0); -} diff --git a/drivers/net/ixgb/ixgb_hw.h b/drivers/net/ixgb/ixgb_hw.h index 97898efe7cc8..8bcf31ed10c2 100644 --- a/drivers/net/ixgb/ixgb_hw.h +++ b/drivers/net/ixgb/ixgb_hw.h @@ -822,17 +822,8 @@ extern void ixgb_clear_vfta(struct ixgb_hw *hw); /* Access functions to eeprom data */ void ixgb_get_ee_mac_addr(struct ixgb_hw *hw, uint8_t *mac_addr); -uint16_t ixgb_get_ee_compatibility(struct ixgb_hw *hw); uint32_t ixgb_get_ee_pba_number(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_init_ctrl_reg_1(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_init_ctrl_reg_2(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_subsystem_id(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_subvendor_id(struct ixgb_hw *hw); uint16_t ixgb_get_ee_device_id(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_vendor_id(struct ixgb_hw *hw); -uint16_t ixgb_get_ee_swdpins_reg(struct ixgb_hw *hw); -uint8_t ixgb_get_ee_d3_power(struct ixgb_hw *hw); -uint8_t ixgb_get_ee_d0_power(struct ixgb_hw *hw); boolean_t ixgb_get_eeprom_data(struct ixgb_hw *hw); uint16_t ixgb_get_eeprom_word(struct ixgb_hw *hw, uint16_t index); From b40a1f06c062d5fb2dc11fcb826d97b28918524f Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:59:44 -0700 Subject: [PATCH 089/584] [PATCH] ixgb: Redefined buffer_info-dma to be dma_addr_t instead of uint64 Redefined buffer_info-dma to be dma_addr_t instead of uint64 Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgb/ixgb.h b/drivers/net/ixgb/ixgb.h index f8d3385c7842..c83271b38621 100644 --- a/drivers/net/ixgb/ixgb.h +++ b/drivers/net/ixgb/ixgb.h @@ -119,7 +119,7 @@ struct ixgb_adapter; * so a DMA handle can be stored along with the buffer */ struct ixgb_buffer { struct sk_buff *skb; - uint64_t dma; + dma_addr_t dma; unsigned long time_stamp; uint16_t length; uint16_t next_to_watch; From ab707da7cf0a1a1d27c6021356cfb3692cf1bd26 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 11 Aug 2005 13:59:59 -0700 Subject: [PATCH 090/584] [PATCH] ixgb: Driver version, white space, comments Driver version, white space, comments & added Module_version Patch from linville Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/ixgb/ixgb_ethtool.c | 3 ++- drivers/net/ixgb/ixgb_main.c | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index 762e3a0e92b0..9d026ed77ddd 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -271,7 +271,8 @@ ixgb_get_regs(struct net_device *netdev, uint8_t i; /* the 1 (one) below indicates an attempt at versioning, if the - * interface in ethtool or the driver this 1 should be incremented */ + * interface in ethtool or the driver changes, this 1 should be + * incremented */ regs->version = (1<<24) | hw->revision_id << 16 | hw->device_id; /* General Registers */ diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index d7a0f4e36118..5c555373adbe 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -29,6 +29,11 @@ #include "ixgb.h" /* Change Log + * 1.0.96 04/19/05 + * - Make needlessly global code static -- bunk@stusta.de + * - ethtool cleanup -- shemminger@osdl.org + * - Support for MODULE_VERSION -- linville@tuxdriver.com + * - add skb_header_cloned check to the tso path -- herbert@apana.org.au * 1.0.88 01/05/05 * - include fix to the condition that determines when to quit NAPI - Robert Olsson * - use netif_poll_{disable/enable} to synchronize between NAPI and i/f up/down @@ -47,10 +52,9 @@ char ixgb_driver_string[] = "Intel(R) PRO/10GbE Network Driver"; #else #define DRIVERNAPI "-NAPI" #endif - -#define DRV_VERSION "1.0.95-k2"DRIVERNAPI +#define DRV_VERSION "1.0.100-k2"DRIVERNAPI char ixgb_driver_version[] = DRV_VERSION; -char ixgb_copyright[] = "Copyright (c) 1999-2005 Intel Corporation."; +static char ixgb_copyright[] = "Copyright (c) 1999-2005 Intel Corporation."; /* ixgb_pci_tbl - PCI Device ID Table * From 6ae4cfb5711b6f2878c9e384617971d98c34a7f5 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Fri, 12 Aug 2005 14:15:34 +0800 Subject: [PATCH 091/584] [PATCH] libata ata_data_xfer() fix PATCH 1/2: ata_data_xfer() fix Changes: - Modify ata_mmio_data_xfer() and ata_pio_data_xfer() to handle odd-lengthed buffer. - Add some function comments This patch does not reuse ap->pad as alignment buffer since using local variable seems good enough. Signed-off-by: Albert Lee Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 111 ++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 73b1f72b7e43..8f6e536d8924 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2519,6 +2519,20 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } +/** + * ata_mmio_data_xfer - Transfer data by MMIO + * @ap: port to read/write + * @buf: data buffer + * @buflen: buffer length + * @do_write: read/write + * + * Transfer data from/to the device data register by MMIO. + * + * LOCKING: + * Inherited from caller. + * + */ + static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf, unsigned int buflen, int write_data) { @@ -2527,6 +2541,7 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf, u16 *buf16 = (u16 *) buf; void __iomem *mmio = (void __iomem *)ap->ioaddr.data_addr; + /* Transfer multiple of 2 bytes */ if (write_data) { for (i = 0; i < words; i++) writew(le16_to_cpu(buf16[i]), mmio); @@ -2534,19 +2549,76 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf, for (i = 0; i < words; i++) buf16[i] = cpu_to_le16(readw(mmio)); } + + /* Transfer trailing 1 byte, if any. */ + if (unlikely(buflen & 0x01)) { + u16 align_buf[1] = { 0 }; + unsigned char *trailing_buf = buf + buflen - 1; + + if (write_data) { + memcpy(align_buf, trailing_buf, 1); + writew(le16_to_cpu(align_buf[0]), mmio); + } else { + align_buf[0] = cpu_to_le16(readw(mmio)); + memcpy(trailing_buf, align_buf, 1); + } + } } +/** + * ata_pio_data_xfer - Transfer data by PIO + * @ap: port to read/write + * @buf: data buffer + * @buflen: buffer length + * @do_write: read/write + * + * Transfer data from/to the device data register by PIO. + * + * LOCKING: + * Inherited from caller. + * + */ + static void ata_pio_data_xfer(struct ata_port *ap, unsigned char *buf, unsigned int buflen, int write_data) { - unsigned int dwords = buflen >> 1; + unsigned int words = buflen >> 1; + /* Transfer multiple of 2 bytes */ if (write_data) - outsw(ap->ioaddr.data_addr, buf, dwords); + outsw(ap->ioaddr.data_addr, buf, words); else - insw(ap->ioaddr.data_addr, buf, dwords); + insw(ap->ioaddr.data_addr, buf, words); + + /* Transfer trailing 1 byte, if any. */ + if (unlikely(buflen & 0x01)) { + u16 align_buf[1] = { 0 }; + unsigned char *trailing_buf = buf + buflen - 1; + + if (write_data) { + memcpy(align_buf, trailing_buf, 1); + outw(le16_to_cpu(align_buf[0]), ap->ioaddr.data_addr); + } else { + align_buf[0] = cpu_to_le16(inw(ap->ioaddr.data_addr)); + memcpy(trailing_buf, align_buf, 1); + } + } } +/** + * ata_data_xfer - Transfer data from/to the data register. + * @ap: port to read/write + * @buf: data buffer + * @buflen: buffer length + * @do_write: read/write + * + * Transfer data from/to the device data register. + * + * LOCKING: + * Inherited from caller. + * + */ + static void ata_data_xfer(struct ata_port *ap, unsigned char *buf, unsigned int buflen, int do_write) { @@ -2556,6 +2628,16 @@ static void ata_data_xfer(struct ata_port *ap, unsigned char *buf, ata_pio_data_xfer(ap, buf, buflen, do_write); } +/** + * ata_pio_sector - Transfer ATA_SECT_SIZE (512 bytes) of data. + * @qc: Command on going + * + * Transfer ATA_SECT_SIZE of data from/to the ATA device. + * + * LOCKING: + * Inherited from caller. + */ + static void ata_pio_sector(struct ata_queued_cmd *qc) { int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); @@ -2594,6 +2676,18 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) kunmap(page); } +/** + * __atapi_pio_bytes - Transfer data from/to the ATAPI device. + * @qc: Command on going + * @bytes: number of bytes + * + * Transfer Transfer data from/to the ATAPI device. + * + * LOCKING: + * Inherited from caller. + * + */ + static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) { int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); @@ -2645,6 +2739,17 @@ next_sg: } } +/** + * atapi_pio_bytes - Transfer data from/to the ATAPI device. + * @qc: Command on going + * + * Transfer Transfer data from/to the ATAPI device. + * + * LOCKING: + * Inherited from caller. + * + */ + static void atapi_pio_bytes(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; From 563a6e1fb0af58433beec1ab418e1fafbd100b56 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Fri, 12 Aug 2005 14:17:50 +0800 Subject: [PATCH 092/584] [PATCH] libata handle the case when device returns/needs extra data PATCH 2/2: handle the case when device returns/needs extra data Description: Sometimes the device returns/needs extra data than expected. Changes: Modify __atapi_pio_bytes() to handle the case where device returns/needs extra data. - for read case, discard trailing data from the device - for write case, padding zero data to the device Signed-off-by: Albert Lee Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 8f6e536d8924..9add4c521b6b 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2697,10 +2697,33 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) unsigned char *buf; unsigned int offset, count; - if (qc->curbytes == qc->nbytes - bytes) + if (qc->curbytes + bytes >= qc->nbytes) ap->pio_task_state = PIO_ST_LAST; next_sg: + if (unlikely(qc->cursg >= qc->n_elem)) { + /* + * The end of qc->sg is reached and the device expects + * more data to transfer. In order not to overrun qc->sg + * and fulfill length specified in the byte count register, + * - for read case, discard trailing data from the device + * - for write case, padding zero data to the device + */ + u16 pad_buf[1] = { 0 }; + unsigned int words = bytes >> 1; + unsigned int i; + + if (words) /* warning if bytes > 1 */ + printk(KERN_WARNING "ata%u: %u bytes trailing data\n", + ap->id, bytes); + + for (i = 0; i < words; i++) + ata_data_xfer(ap, (unsigned char*)pad_buf, 2, do_write); + + ap->pio_task_state = PIO_ST_LAST; + return; + } + sg = &qc->sg[qc->cursg]; page = sg->page; @@ -2734,9 +2757,8 @@ next_sg: kunmap(page); - if (bytes) { + if (bytes) goto next_sg; - } } /** From 414ed537995617f4cbcab65e193f26a2b2dcfa5e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 16 Aug 2005 20:43:16 +1000 Subject: [PATCH 093/584] drm: add initial r300 3D support. This adds initial r300 3D support to the radeon DRM. From: Nicolai Haehnle, Vladimir Dergachev, and others. Signed-off-by: David Airlie --- drivers/char/drm/Makefile | 2 +- drivers/char/drm/drm_pciids.h | 13 + drivers/char/drm/r300_cmdbuf.c | 801 ++++++++++++++++++ drivers/char/drm/r300_reg.h | 1412 +++++++++++++++++++++++++++++++ drivers/char/drm/radeon_cp.c | 13 + drivers/char/drm/radeon_drm.h | 46 + drivers/char/drm/radeon_drv.h | 29 +- drivers/char/drm/radeon_state.c | 11 + 8 files changed, 2325 insertions(+), 2 deletions(-) create mode 100644 drivers/char/drm/r300_cmdbuf.c create mode 100644 drivers/char/drm/r300_reg.h diff --git a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile index 1945138cb8fb..e41060c76226 100644 --- a/drivers/char/drm/Makefile +++ b/drivers/char/drm/Makefile @@ -14,7 +14,7 @@ mga-objs := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o i810-objs := i810_drv.o i810_dma.o i830-objs := i830_drv.o i830_dma.o i830_irq.o i915-objs := i915_drv.o i915_dma.o i915_irq.o i915_mem.o -radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o +radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o r300_cmdbuf.o ffb-objs := ffb_drv.o ffb_context.o sis-objs := sis_drv.o sis_ds.o sis_mm.o savage-objs := savage_drv.o savage_bci.o savage_state.o diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h index 8e264f9c1a1e..1874c1fd1717 100644 --- a/drivers/char/drm/drm_pciids.h +++ b/drivers/char/drm/drm_pciids.h @@ -25,6 +25,8 @@ {0x1002, 0x4965, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250}, \ {0x1002, 0x4966, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250}, \ {0x1002, 0x4967, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250}, \ + {0x1002, 0x4A49, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420}, \ + {0x1002, 0x4A4B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420}, \ {0x1002, 0x4C57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV200|CHIP_IS_MOBILITY}, \ {0x1002, 0x4C58, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV200|CHIP_IS_MOBILITY}, \ {0x1002, 0x4C59, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100|CHIP_IS_MOBILITY}, \ @@ -33,7 +35,17 @@ {0x1002, 0x4C65, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250|CHIP_IS_MOBILITY}, \ {0x1002, 0x4C66, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250|CHIP_IS_MOBILITY}, \ {0x1002, 0x4C67, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R250|CHIP_IS_MOBILITY}, \ + {0x1002, 0x4E44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ + {0x1002, 0x4E45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ + {0x1002, 0x4E46, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350}, \ + {0x1002, 0x4E47, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ + {0x1002, 0x4E48, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R350}, \ + {0x1002, 0x4E49, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R350}, \ + {0x1002, 0x4E4A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350}, \ + {0x1002, 0x4E4B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R350}, \ {0x1002, 0x4E50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350|CHIP_IS_MOBILITY}, \ + {0x1002, 0x4E51, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350|CHIP_IS_MOBILITY}, \ + {0x1002, 0x4E54, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350|CHIP_IS_MOBILITY}, \ {0x1002, 0x5144, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R100|CHIP_SINGLE_CRTC}, \ {0x1002, 0x5145, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R100|CHIP_SINGLE_CRTC}, \ {0x1002, 0x5146, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R100|CHIP_SINGLE_CRTC}, \ @@ -56,6 +68,7 @@ {0x1002, 0x516A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R200}, \ {0x1002, 0x516B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R200}, \ {0x1002, 0x516C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R200}, \ + {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|CHIP_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|CHIP_IS_IGP|CHIP_IS_MOBILITY}, \ {0x1002, 0x5836, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|CHIP_IS_IGP}, \ diff --git a/drivers/char/drm/r300_cmdbuf.c b/drivers/char/drm/r300_cmdbuf.c new file mode 100644 index 000000000000..623f1f460cb5 --- /dev/null +++ b/drivers/char/drm/r300_cmdbuf.c @@ -0,0 +1,801 @@ +/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*- + * + * Copyright (C) The Weather Channel, Inc. 2002. + * Copyright (C) 2004 Nicolai Haehnle. + * All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Nicolai Haehnle + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" +#include "r300_reg.h" + + +#define R300_SIMULTANEOUS_CLIPRECTS 4 + +/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects + */ +static const int r300_cliprect_cntl[4] = { + 0xAAAA, + 0xEEEE, + 0xFEFE, + 0xFFFE +}; + + +/** + * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command + * buffer, starting with index n. + */ +static int r300_emit_cliprects(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + int n) +{ + drm_clip_rect_t box; + int nr; + int i; + RING_LOCALS; + + nr = cmdbuf->nbox - n; + if (nr > R300_SIMULTANEOUS_CLIPRECTS) + nr = R300_SIMULTANEOUS_CLIPRECTS; + + DRM_DEBUG("%i cliprects\n", nr); + + if (nr) { + BEGIN_RING(6 + nr*2); + OUT_RING( CP_PACKET0( R300_RE_CLIPRECT_TL_0, nr*2 - 1 ) ); + + for(i = 0; i < nr; ++i) { + if (DRM_COPY_FROM_USER_UNCHECKED(&box, &cmdbuf->boxes[n+i], sizeof(box))) { + DRM_ERROR("copy cliprect faulted\n"); + return DRM_ERR(EFAULT); + } + + box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; + box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; + box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; + box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; + + OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | + (box.y1 << R300_CLIPRECT_Y_SHIFT)); + OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | + (box.y2 << R300_CLIPRECT_Y_SHIFT)); + } + + OUT_RING_REG( R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr-1] ); + + /* TODO/SECURITY: Force scissors to a safe value, otherwise the + * client might be able to trample over memory. + * The impact should be very limited, but I'd rather be safe than + * sorry. + */ + OUT_RING( CP_PACKET0( R300_RE_SCISSORS_TL, 1 ) ); + OUT_RING( 0 ); + OUT_RING( R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK ); + ADVANCE_RING(); + } else { + /* Why we allow zero cliprect rendering: + * There are some commands in a command buffer that must be submitted + * even when there are no cliprects, e.g. DMA buffer discard + * or state setting (though state setting could be avoided by + * simulating a loss of context). + * + * Now since the cmdbuf interface is so chaotic right now (and is + * bound to remain that way for a bit until things settle down), + * it is basically impossible to filter out the commands that are + * necessary and those that aren't. + * + * So I choose the safe way and don't do any filtering at all; + * instead, I simply set up the engine so that all rendering + * can't produce any fragments. + */ + BEGIN_RING(2); + OUT_RING_REG( R300_RE_CLIPRECT_CNTL, 0 ); + ADVANCE_RING(); + } + + return 0; +} + +u8 r300_reg_flags[0x10000>>2]; + + +void r300_init_reg_flags(void) +{ + int i; + memset(r300_reg_flags, 0, 0x10000>>2); + #define ADD_RANGE_MARK(reg, count,mark) \ + for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\ + r300_reg_flags[i]|=(mark); + + #define MARK_SAFE 1 + #define MARK_CHECK_OFFSET 2 + + #define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE) + + /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */ + ADD_RANGE(R300_SE_VPORT_XSCALE, 6); + ADD_RANGE(0x2080, 1); + ADD_RANGE(R300_SE_VTE_CNTL, 2); + ADD_RANGE(0x2134, 2); + ADD_RANGE(0x2140, 1); + ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2); + ADD_RANGE(0x21DC, 1); + ADD_RANGE(0x221C, 1); + ADD_RANGE(0x2220, 4); + ADD_RANGE(0x2288, 1); + ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); + ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); + ADD_RANGE(R300_GB_ENABLE, 1); + ADD_RANGE(R300_GB_MSPOS0, 5); + ADD_RANGE(R300_TX_ENABLE, 1); + ADD_RANGE(0x4200, 4); + ADD_RANGE(0x4214, 1); + ADD_RANGE(R300_RE_POINTSIZE, 1); + ADD_RANGE(0x4230, 3); + ADD_RANGE(R300_RE_LINE_CNT, 1); + ADD_RANGE(0x4238, 1); + ADD_RANGE(0x4260, 3); + ADD_RANGE(0x4274, 4); + ADD_RANGE(0x4288, 5); + ADD_RANGE(0x42A0, 1); + ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4); + ADD_RANGE(0x42B4, 1); + ADD_RANGE(R300_RE_CULL_CNTL, 1); + ADD_RANGE(0x42C0, 2); + ADD_RANGE(R300_RS_CNTL_0, 2); + ADD_RANGE(R300_RS_INTERP_0, 8); + ADD_RANGE(R300_RS_ROUTE_0, 8); + ADD_RANGE(0x43A4, 2); + ADD_RANGE(0x43E8, 1); + ADD_RANGE(R300_PFS_CNTL_0, 3); + ADD_RANGE(R300_PFS_NODE_0, 4); + ADD_RANGE(R300_PFS_TEXI_0, 64); + ADD_RANGE(0x46A4, 5); + ADD_RANGE(R300_PFS_INSTR0_0, 64); + ADD_RANGE(R300_PFS_INSTR1_0, 64); + ADD_RANGE(R300_PFS_INSTR2_0, 64); + ADD_RANGE(R300_PFS_INSTR3_0, 64); + ADD_RANGE(0x4BC0, 1); + ADD_RANGE(0x4BC8, 3); + ADD_RANGE(R300_PP_ALPHA_TEST, 2); + ADD_RANGE(0x4BD8, 1); + ADD_RANGE(R300_PFS_PARAM_0_X, 64); + ADD_RANGE(0x4E00, 1); + ADD_RANGE(R300_RB3D_CBLEND, 2); + ADD_RANGE(R300_RB3D_COLORMASK, 1); + ADD_RANGE(0x4E10, 3); + ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */ + ADD_RANGE(R300_RB3D_COLORPITCH0, 1); + ADD_RANGE(0x4E50, 9); + ADD_RANGE(0x4E88, 1); + ADD_RANGE(0x4EA0, 2); + ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3); + ADD_RANGE(0x4F10, 4); + ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */ + ADD_RANGE(R300_RB3D_DEPTHPITCH, 1); + ADD_RANGE(0x4F28, 1); + ADD_RANGE(0x4F30, 2); + ADD_RANGE(0x4F44, 1); + ADD_RANGE(0x4F54, 1); + + ADD_RANGE(R300_TX_FILTER_0, 16); + ADD_RANGE(R300_TX_UNK1_0, 16); + ADD_RANGE(R300_TX_SIZE_0, 16); + ADD_RANGE(R300_TX_FORMAT_0, 16); + /* Texture offset is dangerous and needs more checking */ + ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET); + ADD_RANGE(R300_TX_UNK4_0, 16); + ADD_RANGE(R300_TX_BORDER_COLOR_0, 16); + + /* Sporadic registers used as primitives are emitted */ + ADD_RANGE(0x4f18, 1); + ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1); + ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8); + ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8); + +} + +static __inline__ int r300_check_range(unsigned reg, int count) +{ + int i; + if(reg & ~0xffff)return -1; + for(i=(reg>>2);i<(reg>>2)+count;i++) + if(r300_reg_flags[i]!=MARK_SAFE)return 1; + return 0; +} + + /* we expect offsets passed to the framebuffer to be either within video memory or + within AGP space */ +static __inline__ int r300_check_offset(drm_radeon_private_t* dev_priv, u32 offset) +{ + /* we realy want to check against end of video aperture + but this value is not being kept. + This code is correct for now (does the same thing as the + code that sets MC_FB_LOCATION) in radeon_cp.c */ + if((offset>=dev_priv->fb_location) && + (offsetgart_vm_start))return 0; + if((offset>=dev_priv->gart_vm_start) && + (offsetgart_vm_start+dev_priv->gart_size))return 0; + return 1; +} + +static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + drm_r300_cmd_header_t header) +{ + int reg; + int sz; + int i; + int values[64]; + RING_LOCALS; + + sz = header.packet0.count; + reg = (header.packet0.reghi << 8) | header.packet0.reglo; + + if((sz>64)||(sz<0)){ + DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz); + return DRM_ERR(EINVAL); + } + for(i=0;ibuf)[i]; + switch(r300_reg_flags[(reg>>2)+i]){ + case MARK_SAFE: + break; + case MARK_CHECK_OFFSET: + if(r300_check_offset(dev_priv, (u32)values[i])){ + DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", reg, sz); + return DRM_ERR(EINVAL); + } + break; + default: + DRM_ERROR("Register %04x failed check as flag=%02x\n", reg+i*4, r300_reg_flags[(reg>>2)+i]); + return DRM_ERR(EINVAL); + } + } + + BEGIN_RING(1+sz); + OUT_RING( CP_PACKET0( reg, sz-1 ) ); + OUT_RING_TABLE( values, sz ); + ADVANCE_RING(); + + cmdbuf->buf += sz*4; + cmdbuf->bufsz -= sz*4; + + return 0; +} + +/** + * Emits a packet0 setting arbitrary registers. + * Called by r300_do_cp_cmdbuf. + * + * Note that checks are performed on contents and addresses of the registers + */ +static __inline__ int r300_emit_packet0(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + drm_r300_cmd_header_t header) +{ + int reg; + int sz; + RING_LOCALS; + + sz = header.packet0.count; + reg = (header.packet0.reghi << 8) | header.packet0.reglo; + + if (!sz) + return 0; + + if (sz*4 > cmdbuf->bufsz) + return DRM_ERR(EINVAL); + + if (reg+sz*4 >= 0x10000){ + DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz); + return DRM_ERR(EINVAL); + } + + if(r300_check_range(reg, sz)){ + /* go and check everything */ + return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header); + } + /* the rest of the data is safe to emit, whatever the values the user passed */ + + BEGIN_RING(1+sz); + OUT_RING( CP_PACKET0( reg, sz-1 ) ); + OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz ); + ADVANCE_RING(); + + cmdbuf->buf += sz*4; + cmdbuf->bufsz -= sz*4; + + return 0; +} + + +/** + * Uploads user-supplied vertex program instructions or parameters onto + * the graphics card. + * Called by r300_do_cp_cmdbuf. + */ +static __inline__ int r300_emit_vpu(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + drm_r300_cmd_header_t header) +{ + int sz; + int addr; + RING_LOCALS; + + sz = header.vpu.count; + addr = (header.vpu.adrhi << 8) | header.vpu.adrlo; + + if (!sz) + return 0; + if (sz*16 > cmdbuf->bufsz) + return DRM_ERR(EINVAL); + + BEGIN_RING(5+sz*4); + /* Wait for VAP to come to senses.. */ + /* there is no need to emit it multiple times, (only once before VAP is programmed, + but this optimization is for later */ + OUT_RING_REG( R300_VAP_PVS_WAITIDLE, 0 ); + OUT_RING_REG( R300_VAP_PVS_UPLOAD_ADDRESS, addr ); + OUT_RING( CP_PACKET0_TABLE( R300_VAP_PVS_UPLOAD_DATA, sz*4 - 1 ) ); + OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz*4 ); + + ADVANCE_RING(); + + cmdbuf->buf += sz*16; + cmdbuf->bufsz -= sz*16; + + return 0; +} + + +/** + * Emit a clear packet from userspace. + * Called by r300_emit_packet3. + */ +static __inline__ int r300_emit_clear(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf) +{ + RING_LOCALS; + + if (8*4 > cmdbuf->bufsz) + return DRM_ERR(EINVAL); + + BEGIN_RING(10); + OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 8 ) ); + OUT_RING( R300_PRIM_TYPE_POINT|R300_PRIM_WALK_RING| + (1<buf, 8 ); + ADVANCE_RING(); + + cmdbuf->buf += 8*4; + cmdbuf->bufsz -= 8*4; + + return 0; +} + +static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + u32 header) +{ + int count, i,k; + #define MAX_ARRAY_PACKET 64 + u32 payload[MAX_ARRAY_PACKET]; + u32 narrays; + RING_LOCALS; + + count=(header>>16) & 0x3fff; + + if((count+1)>MAX_ARRAY_PACKET){ + DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count); + return DRM_ERR(EINVAL); + } + memset(payload, 0, MAX_ARRAY_PACKET*4); + memcpy(payload, cmdbuf->buf+4, (count+1)*4); + + /* carefully check packet contents */ + + narrays=payload[0]; + k=0; + i=1; + while((kbuf += (count+2)*4; + cmdbuf->bufsz -= (count+2)*4; + + return 0; +} + +static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf) +{ + u32 header; + int count; + RING_LOCALS; + + if (4 > cmdbuf->bufsz) + return DRM_ERR(EINVAL); + + /* Fixme !! This simply emits a packet without much checking. + We need to be smarter. */ + + /* obtain first word - actual packet3 header */ + header = *(u32 __user*)cmdbuf->buf; + + /* Is it packet 3 ? */ + if( (header>>30)!=0x3 ) { + DRM_ERROR("Not a packet3 header (0x%08x)\n", header); + return DRM_ERR(EINVAL); + } + + count=(header>>16) & 0x3fff; + + /* Check again now that we know how much data to expect */ + if ((count+2)*4 > cmdbuf->bufsz){ + DRM_ERROR("Expected packet3 of length %d but have only %d bytes left\n", + (count+2)*4, cmdbuf->bufsz); + return DRM_ERR(EINVAL); + } + + /* Is it a packet type we know about ? */ + switch(header & 0xff00){ + case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */ + return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header); + + case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ + case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ + case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ + case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ + case RADEON_WAIT_FOR_IDLE: + case RADEON_CP_NOP: + /* these packets are safe */ + break; + default: + DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); + return DRM_ERR(EINVAL); + } + + + BEGIN_RING(count+2); + OUT_RING(header); + OUT_RING_TABLE( (int __user*)(cmdbuf->buf+4), count+1); + ADVANCE_RING(); + + cmdbuf->buf += (count+2)*4; + cmdbuf->bufsz -= (count+2)*4; + + return 0; +} + + +/** + * Emit a rendering packet3 from userspace. + * Called by r300_do_cp_cmdbuf. + */ +static __inline__ int r300_emit_packet3(drm_radeon_private_t* dev_priv, + drm_radeon_cmd_buffer_t* cmdbuf, + drm_r300_cmd_header_t header) +{ + int n; + int ret; + char __user* orig_buf = cmdbuf->buf; + int orig_bufsz = cmdbuf->bufsz; + + /* This is a do-while-loop so that we run the interior at least once, + * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. + */ + n = 0; + do { + if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) { + ret = r300_emit_cliprects(dev_priv, cmdbuf, n); + if (ret) + return ret; + + cmdbuf->buf = orig_buf; + cmdbuf->bufsz = orig_bufsz; + } + + switch(header.packet3.packet) { + case R300_CMD_PACKET3_CLEAR: + DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n"); + ret = r300_emit_clear(dev_priv, cmdbuf); + if (ret) { + DRM_ERROR("r300_emit_clear failed\n"); + return ret; + } + break; + + case R300_CMD_PACKET3_RAW: + DRM_DEBUG("R300_CMD_PACKET3_RAW\n"); + ret = r300_emit_raw_packet3(dev_priv, cmdbuf); + if (ret) { + DRM_ERROR("r300_emit_raw_packet3 failed\n"); + return ret; + } + break; + + default: + DRM_ERROR("bad packet3 type %i at %p\n", + header.packet3.packet, + cmdbuf->buf - sizeof(header)); + return DRM_ERR(EINVAL); + } + + n += R300_SIMULTANEOUS_CLIPRECTS; + } while(n < cmdbuf->nbox); + + return 0; +} + +/* Some of the R300 chips seem to be extremely touchy about the two registers + * that are configured in r300_pacify. + * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace + * sends a command buffer that contains only state setting commands and a + * vertex program/parameter upload sequence, this will eventually lead to a + * lockup, unless the sequence is bracketed by calls to r300_pacify. + * So we should take great care to *always* call r300_pacify before + * *anything* 3D related, and again afterwards. This is what the + * call bracket in r300_do_cp_cmdbuf is for. + */ + +/** + * Emit the sequence to pacify R300. + */ +static __inline__ void r300_pacify(drm_radeon_private_t* dev_priv) +{ + RING_LOCALS; + + BEGIN_RING(6); + OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) ); + OUT_RING( 0xa ); + OUT_RING( CP_PACKET0( 0x4f18, 0 ) ); + OUT_RING( 0x3 ); + OUT_RING( CP_PACKET3( RADEON_CP_NOP, 0 ) ); + OUT_RING( 0x0 ); + ADVANCE_RING(); +} + + +/** + * Called by r300_do_cp_cmdbuf to update the internal buffer age and state. + * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must + * be careful about how this function is called. + */ +static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv = buf->dev_private; + + buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; + buf->pending = 1; + buf->used = 0; +} + + +/** + * Parses and validates a user-supplied command buffer and emits appropriate + * commands on the DMA ring buffer. + * Called by the ioctl handler function radeon_cp_cmdbuf. + */ +int r300_do_cp_cmdbuf(drm_device_t* dev, + DRMFILE filp, + drm_file_t* filp_priv, + drm_radeon_cmd_buffer_t* cmdbuf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_buf_t *buf = NULL; + int emit_dispatch_age = 0; + int ret = 0; + + DRM_DEBUG("\n"); + + /* See the comment above r300_emit_begin3d for why this call must be here, + * and what the cleanup gotos are for. */ + r300_pacify(dev_priv); + + if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { + ret = r300_emit_cliprects(dev_priv, cmdbuf, 0); + if (ret) + goto cleanup; + } + + while(cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) { + int idx; + drm_r300_cmd_header_t header; + + header.u = *(unsigned int *)cmdbuf->buf; + + cmdbuf->buf += sizeof(header); + cmdbuf->bufsz -= sizeof(header); + + switch(header.header.cmd_type) { + case R300_CMD_PACKET0: + DRM_DEBUG("R300_CMD_PACKET0\n"); + ret = r300_emit_packet0(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_packet0 failed\n"); + goto cleanup; + } + break; + + case R300_CMD_VPU: + DRM_DEBUG("R300_CMD_VPU\n"); + ret = r300_emit_vpu(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_vpu failed\n"); + goto cleanup; + } + break; + + case R300_CMD_PACKET3: + DRM_DEBUG("R300_CMD_PACKET3\n"); + ret = r300_emit_packet3(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_packet3 failed\n"); + goto cleanup; + } + break; + + case R300_CMD_END3D: + DRM_DEBUG("R300_CMD_END3D\n"); + /* TODO: + Ideally userspace driver should not need to issue this call, + i.e. the drm driver should issue it automatically and prevent + lockups. + + In practice, we do not understand why this call is needed and what + it does (except for some vague guesses that it has to do with cache + coherence) and so the user space driver does it. + + Once we are sure which uses prevent lockups the code could be moved + into the kernel and the userspace driver will not + need to use this command. + + Note that issuing this command does not hurt anything + except, possibly, performance */ + r300_pacify(dev_priv); + break; + + case R300_CMD_CP_DELAY: + /* simple enough, we can do it here */ + DRM_DEBUG("R300_CMD_CP_DELAY\n"); + { + int i; + RING_LOCALS; + + BEGIN_RING(header.delay.count); + for(i=0;i= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + idx, dma->buf_count - 1); + ret = DRM_ERR(EINVAL); + goto cleanup; + } + + buf = dma->buflist[idx]; + if (buf->filp != filp || buf->pending) { + DRM_ERROR("bad buffer %p %p %d\n", + buf->filp, filp, buf->pending); + ret = DRM_ERR(EINVAL); + goto cleanup; + } + + emit_dispatch_age = 1; + r300_discard_buffer(dev, buf); + break; + + case R300_CMD_WAIT: + /* simple enough, we can do it here */ + DRM_DEBUG("R300_CMD_WAIT\n"); + if(header.wait.flags==0)break; /* nothing to do */ + + { + RING_LOCALS; + + BEGIN_RING(2); + OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); + OUT_RING( (header.wait.flags & 0xf)<<14 ); + ADVANCE_RING(); + } + break; + + default: + DRM_ERROR("bad cmd_type %i at %p\n", + header.header.cmd_type, + cmdbuf->buf - sizeof(header)); + ret = DRM_ERR(EINVAL); + goto cleanup; + } + } + + DRM_DEBUG("END\n"); + +cleanup: + r300_pacify(dev_priv); + + /* We emit the vertex buffer age here, outside the pacifier "brackets" + * for two reasons: + * (1) This may coalesce multiple age emissions into a single one and + * (2) more importantly, some chips lock up hard when scratch registers + * are written inside the pacifier bracket. + */ + if (emit_dispatch_age) { + RING_LOCALS; + + /* Emit the vertex buffer age */ + BEGIN_RING(2); + RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); + ADVANCE_RING(); + } + + COMMIT_RING(); + + return ret; +} + diff --git a/drivers/char/drm/r300_reg.h b/drivers/char/drm/r300_reg.h new file mode 100644 index 000000000000..c3e7ca3dbe3d --- /dev/null +++ b/drivers/char/drm/r300_reg.h @@ -0,0 +1,1412 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* +This file contains registers and constants for the R300. They have been +found mostly by examining command buffers captured using glxtest, as well +as by extrapolating some known registers and constants from the R200. + +I am fairly certain that they are correct unless stated otherwise in comments. +*/ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + + +/* This register is written directly and also starts data section in many 3d CP_PACKET3's */ +#define R300_VAP_VF_CNTL 0x2084 + +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +/* BEGIN: Wild guesses */ +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +/* END */ + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA 0x00000001 +# define R300_VPORT_X_OFFSET_ENA 0x00000002 +# define R300_VPORT_Y_SCALE_ENA 0x00000004 +# define R300_VPORT_Y_OFFSET_ENA 0x00000008 +# define R300_VPORT_Z_SCALE_ENA 0x00000010 +# define R300_VPORT_Z_OFFSET_ENA 0x00000020 +# define R300_VTX_XY_FMT 0x00000100 +# define R300_VTX_Z_FMT 0x00000200 +# define R300_VTX_W0_FMT 0x00000400 +# define R300_VTX_W0_NORMALIZE 0x00000800 +# define R300_VTX_ST_DENORMALIZED 0x00001000 + +/* BEGIN: Vertex data assembly - lots of uncertainties */ +/* gap */ +/* Where do we get our vertex data? +// +// Vertex data either comes either from immediate mode registers or from +// vertex arrays. +// There appears to be no mixed mode (though we can force the pitch of +// vertex arrays to 0, effectively reusing the same element over and over +// again). +// +// Immediate mode is controlled by the INPUT_CNTL registers. I am not sure +// if these registers influence vertex array processing. +// +// Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. +// +// In both cases, vertex attributes are then passed through INPUT_ROUTE. + +// Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data +// into the vertex processor's input registers. +// The first word routes the first input, the second word the second, etc. +// The corresponding input is routed into the register with the given index. +// The list is ended by a word with INPUT_ROUTE_END set. +// +// Always set COMPONENTS_4 in immediate mode. */ + +#define R300_VAP_INPUT_ROUTE_0_0 0x2150 +# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 +# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_END (1 << 13) +# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ +#define R300_VAP_INPUT_ROUTE_0_1 0x2154 +#define R300_VAP_INPUT_ROUTE_0_2 0x2158 +#define R300_VAP_INPUT_ROUTE_0_3 0x215C +#define R300_VAP_INPUT_ROUTE_0_4 0x2160 +#define R300_VAP_INPUT_ROUTE_0_5 0x2164 +#define R300_VAP_INPUT_ROUTE_0_6 0x2168 +#define R300_VAP_INPUT_ROUTE_0_7 0x216C + +/* gap */ +/* Notes: +// - always set up to produce at least two attributes: +// if vertex program uses only position, fglrx will set normal, too +// - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal */ +#define R300_VAP_INPUT_CNTL_0 0x2180 +# define R300_INPUT_CNTL_0_COLOR 0x00000001 +#define R300_VAP_INPUT_CNTL_1 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* gap */ +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 +// are set to a swizzling bit pattern, other words are 0. +// +// In immediate mode, the pattern is always set to xyzw. In vertex array +// mode, the swizzling pattern is e.g. used to set zw components in texture +// coordinates with only tweo components. */ +#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 +# define R300_INPUT_ROUTE_SELECT_MASK 7 +# define R300_INPUT_ROUTE_X_SHIFT 0 +# define R300_INPUT_ROUTE_Y_SHIFT 3 +# define R300_INPUT_ROUTE_Z_SHIFT 6 +# define R300_INPUT_ROUTE_W_SHIFT 9 +# define R300_INPUT_ROUTE_ENABLE (15 << 12) +#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 +#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 +#define R300_VAP_INPUT_ROUTE_1_3 0x21EC +#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 +#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 +#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 +#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +/* END */ + +/* gap */ +/* BEGIN: Upload vertex program and data +// The programmable vertex shader unit has a memory bank of unknown size +// that can be written to in 16 byte units by writing the address into +// UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). +// +// Pointers into the memory bank are always in multiples of 16 bytes. +// +// The memory bank is divided into areas with fixed meaning. +// +// Starting at address UPLOAD_PROGRAM: Vertex program instructions. +// Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), +// whereas the difference between known addresses suggests size 512. +// +// Starting at address UPLOAD_PARAMETERS: Vertex program parameters. +// Native reported limits and the VPI layout suggest size 256, whereas +// difference between known addresses suggests size 512. +// +// At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the +// floating point pointsize. The exact purpose of this state is uncertain, +// as there is also the R300_RE_POINTSIZE register. +// +// Multiple vertex programs and parameter sets can be loaded at once, +// which could explain the size discrepancy. */ +#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 +# define R300_PVS_UPLOAD_PROGRAM 0x00000000 +# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 +/* gap */ +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 +/* END */ + +/* gap */ +/* I do not know the purpose of this register. However, I do know that +// it is set to 221C_CLEAR for clear operations and to 221C_NORMAL +// for normal rendering. */ +#define R300_VAP_UNKNOWN_221C 0x221C +# define R300_221C_NORMAL 0x00000000 +# define R300_221C_CLEAR 0x0001C000 + +/* gap */ +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between +// rendering commands and overwriting vertex program parameters. +// Therefore, I suspect writing zero to 0x2284 synchronizes the engine and +// avoids bugs caused by still running shaders reading bad data from memory. */ +#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ + +/* Absolutely no clue what this register is about. */ +#define R300_VAP_UNKNOWN_2288 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ +/* Addresses are relative to the vertex program instruction area of the +// memory bank. PROGRAM_END points to the last instruction of the active +// program +// +// The meaning of the two UNKNOWN fields is obviously not known. However, +// experiments so far have shown that both *must* point to an instruction +// inside the vertex program, otherwise the GPU locks up. +// fglrx usually sets CNTL_3_UNKNOWN to the end of the program and +// CNTL_1_UNKNOWN points to instruction where last write to position takes place. +// Most likely this is used to ignore rest of the program in cases where group of verts arent visible. +// For some reason this "section" is sometimes accepted other instruction that have +// no relationship with position calculations. +*/ +#define R300_VAP_PVS_CNTL_1 0x22D0 +# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 +# define R300_PVS_CNTL_1_POS_END_SHIFT 10 +# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CNTL_2 0x22D4 +# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 +# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 +#define R300_VAP_PVS_CNTL_3 0x22D8 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for +// immediate vertices */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +#define R300_VAP_VTX_END_OF_PKT 0x24AC /* write 0 to indicate end of packet? */ + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + and are here so we can use one register file instead of several + - Vladimir */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + (or something closely related to that). + This bit is rather fatal at the time being due to lackings at pixel shader side */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_ENABLE (1<<0) +# define R300_GB_LINE_STUFF_ENABLE (1<<1) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2) +# define R300_GB_STENCIL_AUTO_ENABLE (1<<4) +# define R300_GB_UNK30 (1<<30) + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 +#define R300_GB_TEX_ST 1 +#define R300_GB_TEX_STR 2 +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + + +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_ENABLE (1<<0) +# define R300_GB_TILE_PIPE_COUNT_RV300 0 +# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1) +# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1) +# define R300_GB_TILE_SIZE_8 0 +# define R300_GB_TILE_SIZE_16 (1<<4) +# define R300_GB_TILE_SIZE_32 (2<<4) +# define R300_GB_SUPER_SIZE_1 (0<<6) +# define R300_GB_SUPER_SIZE_2 (1<<6) +# define R300_GB_SUPER_SIZE_4 (2<<6) +# define R300_GB_SUPER_SIZE_8 (3<<6) +# define R300_GB_SUPER_SIZE_16 (4<<6) +# define R300_GB_SUPER_SIZE_32 (5<<6) +# define R300_GB_SUPER_SIZE_64 (6<<6) +# define R300_GB_SUPER_SIZE_128 (7<<6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A 0 +# define R300_GB_SUPER_TILE_B (1<<15) +# define R300_GB_SUBPIXEL_1_12 0 +# define R300_GB_SUBPIXEL_1_16 (1<<16) + +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define R300_GB_SELECT 0x401C +# define R300_GB_FOG_SELECT_C0A 0 +# define R300_GB_FOG_SELECT_C1A 1 +# define R300_GB_FOG_SELECT_C2A 2 +# define R300_GB_FOG_SELECT_C3A 3 +# define R300_GB_FOG_SELECT_1_1_W 4 +# define R300_GB_FOG_SELECT_Z 5 +# define R300_GB_DEPTH_SELECT_Z 0 +# define R300_GB_DEPTH_SELECT_1_1_W (1<<3) +# define R300_GB_W_SELECT_1_W 0 +# define R300_GB_W_SELECT_1 (1<<4) + +#define R300_GB_AA_CONFIG 0x4020 +# define R300_AA_ENABLE 0x01 +# define R300_AA_SUBSAMPLES_2 0 +# define R300_AA_SUBSAMPLES_3 (1<<1) +# define R300_AA_SUBSAMPLES_4 (2<<1) +# define R300_AA_SUBSAMPLES_6 (3<<1) + +/* END */ + +/* gap */ +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +/* The pointsize is given in multiples of 6. The pointsize can be +// enormous: Clear() renders a single point that fills the entire +// framebuffer. */ +#define R300_RE_POINTSIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */ +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* The line width is given in multiples of 6. + In default mode lines are classified as vertical lines. + HO: horizontal + VE: vertical or horizontal + HO & VE: no classification +*/ +#define R300_RE_LINE_CNT 0x4234 +# define R300_LINESIZE_SHIFT 0 +# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ +# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6) +# define R300_LINE_CNT_HO (1 << 16) +# define R300_LINE_CNT_VE (1 << 17) + +/* Some sort of scale or clamp value for texcoordless textures. */ +#define R300_RE_UNK4238 0x4238 + +#define R300_RE_SHADE_MODEL 0x4278 +# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa +# define R300_RE_SHADE_MODEL_FLAT 0x39595 + +/* Dangerous */ +#define R300_RE_POLYGON_MODE 0x4288 +# define R300_PM_ENABLED (1 << 0) +# define R300_PM_FRONT_POINT (0 << 0) +# define R300_PM_BACK_POINT (0 << 0) +# define R300_PM_FRONT_LINE (1 << 4) +# define R300_PM_FRONT_FILL (1 << 5) +# define R300_PM_BACK_LINE (1 << 7) +# define R300_PM_BACK_FILL (1 << 8) + +/* Not sure why there are duplicate of factor and constant values. + My best guess so far is that there are seperate zbiases for test and write. + Ordering might be wrong. + Some of the tests indicate that fgl has a fallback implementation of zbias + via pixel shaders. */ +#define R300_RE_ZBIAS_T_FACTOR 0x42A4 +#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 +#define R300_RE_ZBIAS_W_FACTOR 0x42AC +#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + perform depth test (see --vb-triangles in r300_demo) + Don't know about other chips. - Vladimir + This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + My guess is that there are two bits for each zbias primitive (FILL, LINE, POINT). + One to enable depth test and one for depth write. + Yet this doesnt explain why depth writes work ... + */ +#define R300_RE_OCCLUSION_CNTL 0x42B4 +# define R300_OCCLUSION_ON (1<<1) + +#define R300_RE_CULL_CNTL 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + + +/* BEGIN: Rasterization / Interpolators - many guesses +// 0_UNKNOWN_18 has always been set except for clear operations. +// TC_CNT is the number of incoming texture coordinate sets (i.e. it depends +// on the vertex program, *not* the fragment program) */ +#define R300_RS_CNTL_0 0x4300 +# define R300_RS_CNTL_TC_CNT_SHIFT 2 +# define R300_RS_CNTL_TC_CNT_MASK (7 << 2) +# define R300_RS_CNTL_CI_CNT_SHIFT 7 /* number of color interpolators used */ +# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) +/* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n register. */ +#define R300_RS_CNTL_1 0x4304 + +/* gap */ +/* Only used for texture coordinates. +// Use the source field to route texture coordinate input from the vertex program +// to the desired interpolator. Note that the source field is relative to the +// outputs the vertex program *actually* writes. If a vertex program only writes +// texcoord[1], this will be source index 0. +// Set INTERP_USED on all interpolators that produce data used by the +// fragment program. INTERP_USED looks like a swizzling mask, but +// I haven't seen it used that way. +// +// Note: The _UNKNOWN constants are always set in their respective register. +// I don't know if this is necessary. */ +#define R300_RS_INTERP_0 0x4310 +#define R300_RS_INTERP_1 0x4314 +# define R300_RS_INTERP_1_UNKNOWN 0x40 +#define R300_RS_INTERP_2 0x4318 +# define R300_RS_INTERP_2_UNKNOWN 0x80 +#define R300_RS_INTERP_3 0x431C +# define R300_RS_INTERP_3_UNKNOWN 0xC0 +#define R300_RS_INTERP_4 0x4320 +#define R300_RS_INTERP_5 0x4324 +#define R300_RS_INTERP_6 0x4328 +#define R300_RS_INTERP_7 0x432C +# define R300_RS_INTERP_SRC_SHIFT 2 +# define R300_RS_INTERP_SRC_MASK (7 << 2) +# define R300_RS_INTERP_USED 0x00D10000 + +/* These DWORDs control how vertex data is routed into fragment program +// registers, after interpolators. */ +#define R300_RS_ROUTE_0 0x4330 +#define R300_RS_ROUTE_1 0x4334 +#define R300_RS_ROUTE_2 0x4338 +#define R300_RS_ROUTE_3 0x433C /* GUESS */ +#define R300_RS_ROUTE_4 0x4340 /* GUESS */ +#define R300_RS_ROUTE_5 0x4344 /* GUESS */ +#define R300_RS_ROUTE_6 0x4348 /* GUESS */ +#define R300_RS_ROUTE_7 0x434C /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_0 0 +# define R300_RS_ROUTE_SOURCE_INTERP_1 1 +# define R300_RS_ROUTE_SOURCE_INTERP_2 2 +# define R300_RS_ROUTE_SOURCE_INTERP_3 3 +# define R300_RS_ROUTE_SOURCE_INTERP_4 4 +# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */ +# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */ +# define R300_RS_ROUTE_DEST_SHIFT 6 +# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */ + +/* Special handling for color: When the fragment program uses color, +// the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the +// color register index. */ +# define R300_RS_ROUTE_0_COLOR (1 << 14) +# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 +# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ +/* As above, but for secondary color */ +# define R300_RS_ROUTE_1_COLOR1 (1 << 14) +# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 +# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) +# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) +/* END */ + +/* BEGIN: Scissors and cliprects +// There are four clipping rectangles. Their corner coordinates are inclusive. +// Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending +// on whether the pixel is inside cliprects 0-3, respectively. For example, +// if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned +// the number 3 (binary 0011). +// Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, +// the pixel is rasterized. +// +// In addition to this, there is a scissors rectangle. Only pixels inside the +// scissors rectangle are drawn. (coordinates are inclusive) +// +// For some reason, the top-left corner of the framebuffer is at (1440, 1440) +// for the purpose of clipping and scissors. */ +#define R300_RE_CLIPRECT_TL_0 0x43B0 +#define R300_RE_CLIPRECT_BR_0 0x43B4 +#define R300_RE_CLIPRECT_TL_1 0x43B8 +#define R300_RE_CLIPRECT_BR_1 0x43BC +#define R300_RE_CLIPRECT_TL_2 0x43C0 +#define R300_RE_CLIPRECT_BR_2 0x43C4 +#define R300_RE_CLIPRECT_TL_3 0x43C8 +#define R300_RE_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_RE_CLIPRECT_CNTL 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ +#define R300_RE_SCISSORS_TL 0x43E0 +#define R300_RE_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) +/* END */ + +/* BEGIN: Texture specification +// The texture specification dwords are grouped by meaning and not by texture unit. +// This means that e.g. the offset for texture image unit N is found in register +// TX_OFFSET_0 + (4*N) */ +#define R300_TX_FILTER_0 0x4400 +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP 4 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_Q_SHIFT 6 +# define R300_TX_WRAP_Q_MASK (7 << 6) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) + +/* NOTE: NEAREST doesnt seem to exist. + Im not seting MAG_FILTER_MASK and (3 << 11) on for all + anisotropy modes because that would void selected mag filter */ +# define R300_TX_MIN_FILTER_ANISO_NEAREST ((0 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) +# define R300_TX_MIN_FILTER_ANISO_LINEAR ((0 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST ((1 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR ((2 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) +# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21) +# define R300_TX_MAX_ANISO_MASK (14 << 21) + +#define R300_TX_UNK1_0 0x4440 +# define R300_LOD_BIAS_MASK 0x1fff + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_UNK23 (1 << 23) +# define R300_TX_SIZE_SHIFT 26 /* largest of width, height */ +# define R300_TX_SIZE_MASK (15 << 26) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R300_TX_FORMAT_X16 0x1 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 +# define R300_TX_FORMAT_CUT_Z 6 /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 /* 2.0*W, everything above 1.0 is set to 0.0 */ + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) (\ + ((R300_TX_FORMAT_##B)<is_pci ) { + /* set RADEON_AGP_BASE here instead of relying on X from user space */ + RADEON_WRITE(RADEON_AGP_BASE, (unsigned int)dev->agp->base); RADEON_WRITE( RADEON_CP_RB_RPTR_ADDR, dev_priv->ring_rptr->offset - dev->agp->base @@ -1626,6 +1629,9 @@ int radeon_cp_init( DRM_IOCTL_ARGS ) DRM_COPY_FROM_USER_IOCTL( init, (drm_radeon_init_t __user *)data, sizeof(init) ); + if(init.func == RADEON_INIT_R300_CP) + r300_init_reg_flags(); + switch ( init.func ) { case RADEON_INIT_CP: case RADEON_INIT_R200_CP: @@ -2040,12 +2046,19 @@ int radeon_driver_preinit(struct drm_device *dev, unsigned long flags) case CHIP_RV200: case CHIP_R200: case CHIP_R300: + case CHIP_R420: dev_priv->flags |= CHIP_HAS_HIERZ; break; default: /* all other chips have no hierarchical z buffer */ break; } + + if (drm_device_is_agp(dev)) + dev_priv->flags |= CHIP_IS_AGP; + + DRM_DEBUG("%s card detected\n", + ((dev_priv->flags & CHIP_IS_AGP) ? "AGP" : "PCI")); return ret; } diff --git a/drivers/char/drm/radeon_drm.h b/drivers/char/drm/radeon_drm.h index c1e62d047989..3792798270a4 100644 --- a/drivers/char/drm/radeon_drm.h +++ b/drivers/char/drm/radeon_drm.h @@ -195,6 +195,52 @@ typedef union { #define RADEON_WAIT_2D 0x1 #define RADEON_WAIT_3D 0x2 +/* Allowed parameters for R300_CMD_PACKET3 + */ +#define R300_CMD_PACKET3_CLEAR 0 +#define R300_CMD_PACKET3_RAW 1 + +/* Commands understood by cmd_buffer ioctl for R300. + * The interface has not been stabilized, so some of these may be removed + * and eventually reordered before stabilization. + */ +#define R300_CMD_PACKET0 1 +#define R300_CMD_VPU 2 /* emit vertex program upload */ +#define R300_CMD_PACKET3 3 /* emit a packet3 */ +#define R300_CMD_END3D 4 /* emit sequence ending 3d rendering */ +#define R300_CMD_CP_DELAY 5 +#define R300_CMD_DMA_DISCARD 6 +#define R300_CMD_WAIT 7 +# define R300_WAIT_2D 0x1 +# define R300_WAIT_3D 0x2 +# define R300_WAIT_2D_CLEAN 0x3 +# define R300_WAIT_3D_CLEAN 0x4 + +typedef union { + unsigned int u; + struct { + unsigned char cmd_type, pad0, pad1, pad2; + } header; + struct { + unsigned char cmd_type, count, reglo, reghi; + } packet0; + struct { + unsigned char cmd_type, count, adrlo, adrhi; + } vpu; + struct { + unsigned char cmd_type, packet, pad0, pad1; + } packet3; + struct { + unsigned char cmd_type, packet; + unsigned short count; /* amount of packet2 to emit */ + } delay; + struct { + unsigned char cmd_type, buf_idx, pad0, pad1; + } dma; + struct { + unsigned char cmd_type, flags, pad0, pad1; + } wait; +} drm_r300_cmd_header_t; #define RADEON_FRONT 0x1 #define RADEON_BACK 0x2 diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h index e701dffe978d..f12a963ede18 100644 --- a/drivers/char/drm/radeon_drv.h +++ b/drivers/char/drm/radeon_drv.h @@ -82,9 +82,10 @@ * - Add support for r100 cube maps * 1.16- Add R200_EMIT_PP_TRI_PERF_CNTL packet to support brilinear * texture filtering on r200 + * 1.17- Add initial support for R300 (3D). */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 16 +#define DRIVER_MINOR 17 #define DRIVER_PATCHLEVEL 0 #define GET_RING_HEAD(dev_priv) DRM_READ32( (dev_priv)->ring_rptr, 0 ) @@ -106,7 +107,9 @@ enum radeon_family { CHIP_RV280, CHIP_R300, CHIP_RS300, + CHIP_R350, CHIP_RV350, + CHIP_R420, CHIP_LAST, }; @@ -321,6 +324,14 @@ extern int radeon_postcleanup( struct drm_device *dev ); extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + +/* r300_cmdbuf.c */ +extern void r300_init_reg_flags(void); + +extern int r300_do_cp_cmdbuf(drm_device_t* dev, DRMFILE filp, + drm_file_t* filp_priv, + drm_radeon_cmd_buffer_t* cmdbuf); + /* Flags for stats.boxes */ #define RADEON_BOX_DMA_IDLE 0x1 @@ -358,6 +369,11 @@ extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, #define RADEON_CRTC2_OFFSET 0x0324 #define RADEON_CRTC2_OFFSET_CNTL 0x0328 +#define RADEON_MPP_TB_CONFIG 0x01c0 +#define RADEON_MEM_CNTL 0x0140 +#define RADEON_MEM_SDRAM_MODE_REG 0x0158 +#define RADEON_AGP_BASE 0x0170 + #define RADEON_RB3D_COLOROFFSET 0x1c40 #define RADEON_RB3D_COLORPITCH 0x1c48 @@ -652,16 +668,27 @@ extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, #define RADEON_CP_PACKET1 0x40000000 #define RADEON_CP_PACKET2 0x80000000 #define RADEON_CP_PACKET3 0xC0000000 +# define RADEON_CP_NOP 0x00001000 +# define RADEON_CP_NEXT_CHAR 0x00001900 +# define RADEON_CP_PLY_NEXTSCAN 0x00001D00 +# define RADEON_CP_SET_SCISSORS 0x00001E00 + /* GEN_INDX_PRIM is unsupported starting with R300 */ # define RADEON_3D_RNDR_GEN_INDX_PRIM 0x00002300 # define RADEON_WAIT_FOR_IDLE 0x00002600 # define RADEON_3D_DRAW_VBUF 0x00002800 # define RADEON_3D_DRAW_IMMD 0x00002900 # define RADEON_3D_DRAW_INDX 0x00002A00 +# define RADEON_CP_LOAD_PALETTE 0x00002C00 # define RADEON_3D_LOAD_VBPNTR 0x00002F00 # define RADEON_MPEG_IDCT_MACROBLOCK 0x00003000 # define RADEON_MPEG_IDCT_MACROBLOCK_REV 0x00003100 # define RADEON_3D_CLEAR_ZMASK 0x00003200 +# define RADEON_CP_INDX_BUFFER 0x00003300 +# define RADEON_CP_3D_DRAW_VBUF_2 0x00003400 +# define RADEON_CP_3D_DRAW_IMMD_2 0x00003500 +# define RADEON_CP_3D_DRAW_INDX_2 0x00003600 # define RADEON_3D_CLEAR_HIZ 0x00003700 +# define RADEON_CP_3D_CLEAR_CMASK 0x00003802 # define RADEON_CNTL_HOSTDATA_BLT 0x00009400 # define RADEON_CNTL_PAINT_MULTI 0x00009A00 # define RADEON_CNTL_BITBLT_MULTI 0x00009B00 diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c index 1f79e249146c..d57accdd8df5 100644 --- a/drivers/char/drm/radeon_state.c +++ b/drivers/char/drm/radeon_state.c @@ -2797,6 +2797,17 @@ static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) orig_nbox = cmdbuf.nbox; + if(dev_priv->microcode_version == UCODE_R300) { + int temp; + temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf); + + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + + return temp; + } + + /* microcode_version != r300 */ while ( cmdbuf.bufsz >= sizeof(header) ) { header.i = *(int *)cmdbuf.buf; From c8b432dc0c8d635254010513ca1a3a10a77037a1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 16 Aug 2005 20:54:18 +1000 Subject: [PATCH 094/584] drm: update pci ids for savage and via Fixup savage and via pci ids From: Dave Airlie --- drivers/char/drm/drm_pciids.h | 55 ++++++++++++++++------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h index 1874c1fd1717..58b1747cd440 100644 --- a/drivers/char/drm/drm_pciids.h +++ b/drivers/char/drm/drm_pciids.h @@ -176,9 +176,10 @@ #define viadrv_PCI_IDS \ {0x1106, 0x3022, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1106, 0x3118, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x1106, 0x3122, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x1106, 0x7205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x1106, 0x7204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1106, 0x3108, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0, 0, 0} #define i810_PCI_IDS \ @@ -196,28 +197,29 @@ {0, 0, 0} #define savage_PCI_IDS \ - {0x5333, 0x8a22, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8a23, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c10, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c11, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c12, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c13, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c22, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c24, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8c2f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8a25, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8a26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8d01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8d02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x5333, 0x8d04, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x5333, 0x8a20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \ + {0x5333, 0x8a21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \ + {0x5333, 0x8a22, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE4}, \ + {0x5333, 0x8a23, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE4}, \ + {0x5333, 0x8c10, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE_MX}, \ + {0x5333, 0x8c11, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE_MX}, \ + {0x5333, 0x8c12, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE_MX}, \ + {0x5333, 0x8c13, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE_MX}, \ + {0x5333, 0x8c22, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c24, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8c2f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SUPERSAVAGE}, \ + {0x5333, 0x8a25, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGE}, \ + {0x5333, 0x8a26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGE}, \ + {0x5333, 0x8d01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_TWISTER}, \ + {0x5333, 0x8d02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_TWISTER}, \ + {0x5333, 0x8d03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \ + {0x5333, 0x8d04, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \ {0, 0, 0} #define ffb_PCI_IDS \ @@ -233,10 +235,3 @@ {0x8086, 0x2772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0, 0, 0} -#define viadrv_PCI_IDS \ - {0x1106, 0x3022, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x1106, 0x3122, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x1106, 0x7205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x1106, 0x7204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0, 0, 0} - From 050ec18a35f3106437da8e9c55e441c076c7b93e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Aug 2005 14:00:54 -0700 Subject: [PATCH 095/584] [PATCH] skge: stop bogus sensor messages Some versions of the Marvell yukon generate bogus sensor warning interrupts. The driver would flood log with these messages. Handle this situation cleanly by masking away at boot time. Fixes: http://bugs.gentoo.org/show_bug.cgi?id=87182 Signed-off-by: Stephen Hemminger drivers/net/skge.c | 24 ++++++++++-------------- drivers/net/skge.h | 8 ++++++-- 2 files changed, 16 insertions(+), 16 deletions(-) Signed-off-by: Jeff Garzik --- drivers/net/skge.c | 24 ++++++++++-------------- drivers/net/skge.h | 8 ++++++-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/skge.c b/drivers/net/skge.c index f15739481d62..9ff1261f07ca 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2670,18 +2670,6 @@ static void skge_error_irq(struct skge_hw *hw) /* Timestamp (unused) overflow */ if (hwstatus & IS_IRQ_TIST_OV) skge_write8(hw, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ); - - if (hwstatus & IS_IRQ_SENSOR) { - /* no sensors on 32-bit Yukon */ - if (!(skge_read16(hw, B0_CTST) & CS_BUS_SLOT_SZ)) { - printk(KERN_ERR PFX "ignoring bogus sensor interrups\n"); - skge_write32(hw, B0_HWE_IMSK, - IS_ERR_MSK & ~IS_IRQ_SENSOR); - } else - printk(KERN_WARNING PFX "sensor interrupt\n"); - } - - } if (hwstatus & IS_RAM_RD_PAR) { @@ -2712,9 +2700,10 @@ static void skge_error_irq(struct skge_hw *hw) skge_pci_clear(hw); + /* if error still set then just ignore it */ hwstatus = skge_read32(hw, B0_HWE_ISRC); if (hwstatus & IS_IRQ_STAT) { - printk(KERN_WARNING PFX "IRQ status %x: still set ignoring hardware errors\n", + pr_debug("IRQ status %x: still set ignoring hardware errors\n", hwstatus); hw->intr_mask &= ~IS_HW_ERR; } @@ -2948,12 +2937,20 @@ static int skge_reset(struct skge_hw *hw) else hw->ram_size = t8 * 4096; + hw->intr_mask = IS_HW_ERR | IS_EXT_REG; if (hw->chip_id == CHIP_ID_GENESIS) genesis_init(hw); else { /* switch power to VCC (WA for VAUX problem) */ skge_write8(hw, B0_POWER_CTRL, PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_OFF | PC_VCC_ON); + /* avoid boards with stuck Hardware error bits */ + if ((skge_read32(hw, B0_ISRC) & IS_HW_ERR) && + (skge_read32(hw, B0_HWE_ISRC) & IS_IRQ_SENSOR)) { + printk(KERN_WARNING PFX "stuck hardware sensor bit\n"); + hw->intr_mask &= ~IS_HW_ERR; + } + for (i = 0; i < hw->ports; i++) { skge_write16(hw, SK_REG(i, GMAC_LINK_CTRL), GMLC_RST_SET); skge_write16(hw, SK_REG(i, GMAC_LINK_CTRL), GMLC_RST_CLR); @@ -2994,7 +2991,6 @@ static int skge_reset(struct skge_hw *hw) skge_write32(hw, B2_IRQM_INI, skge_usecs2clk(hw, 100)); skge_write32(hw, B2_IRQM_CTRL, TIM_START); - hw->intr_mask = IS_HW_ERR | IS_EXT_REG; skge_write32(hw, B0_IMSK, hw->intr_mask); if (hw->chip_id != CHIP_ID_GENESIS) diff --git a/drivers/net/skge.h b/drivers/net/skge.h index b432f1bb8168..636729fcbbaa 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h @@ -214,8 +214,6 @@ enum { /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ enum { - IS_ERR_MSK = 0x00003fff,/* All Error bits */ - IS_IRQ_TIST_OV = 1<<13, /* Time Stamp Timer Overflow (YUKON only) */ IS_IRQ_SENSOR = 1<<12, /* IRQ from Sensor (YUKON only) */ IS_IRQ_MST_ERR = 1<<11, /* IRQ master error detected */ @@ -230,6 +228,12 @@ enum { IS_M2_PAR_ERR = 1<<2, /* MAC 2 Parity Error */ IS_R1_PAR_ERR = 1<<1, /* Queue R1 Parity Error */ IS_R2_PAR_ERR = 1<<0, /* Queue R2 Parity Error */ + + IS_ERR_MSK = IS_IRQ_MST_ERR | IS_IRQ_STAT + | IS_NO_STAT_M1 | IS_NO_STAT_M2 + | IS_RAM_RD_PAR | IS_RAM_WR_PAR + | IS_M1_PAR_ERR | IS_M2_PAR_ERR + | IS_R1_PAR_ERR | IS_R2_PAR_ERR, }; /* B2_TST_CTRL1 8 bit Test Control Register 1 */ From 5e1705ddc83f77da4b29a6d687da14e971912e41 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Aug 2005 14:00:58 -0700 Subject: [PATCH 096/584] [PATCH] skge: fibre vs copper detection cleanup Cleanup the code that handles fibre vs copper detection. Signed-off-by: Stephen Hemminger drivers/net/skge.c | 26 ++++++++++++-------------- drivers/net/skge.h | 11 ++--------- 2 files changed, 14 insertions(+), 23 deletions(-) Signed-off-by: Jeff Garzik --- drivers/net/skge.c | 26 ++++++++++++-------------- drivers/net/skge.h | 11 ++--------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 9ff1261f07ca..3990829d3c46 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -189,7 +189,7 @@ static u32 skge_supported_modes(const struct skge_hw *hw) { u32 supported; - if (iscopper(hw)) { + if (hw->copper) { supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half @@ -222,7 +222,7 @@ static int skge_get_settings(struct net_device *dev, ecmd->transceiver = XCVR_INTERNAL; ecmd->supported = skge_supported_modes(hw); - if (iscopper(hw)) { + if (hw->copper) { ecmd->port = PORT_TP; ecmd->phy_address = hw->phy_addr; } else @@ -1599,7 +1599,7 @@ static void yukon_init(struct skge_hw *hw, int port) adv = PHY_AN_CSMA; if (skge->autoneg == AUTONEG_ENABLE) { - if (iscopper(hw)) { + if (hw->copper) { if (skge->advertising & ADVERTISED_1000baseT_Full) ct1000 |= PHY_M_1000C_AFD; if (skge->advertising & ADVERTISED_1000baseT_Half) @@ -1691,7 +1691,7 @@ static void yukon_mac_init(struct skge_hw *hw, int port) /* Set hardware config mode */ reg = GPC_INT_POL_HI | GPC_DIS_FC | GPC_DIS_SLEEP | GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE; - reg |= iscopper(hw) ? GPC_HWCFG_GMII_COP : GPC_HWCFG_GMII_FIB; + reg |= hw->copper ? GPC_HWCFG_GMII_COP : GPC_HWCFG_GMII_FIB; /* Clear GMC reset */ skge_write32(hw, SK_REG(port, GPHY_CTRL), reg | GPC_RST_SET); @@ -2865,7 +2865,7 @@ static const char *skge_board_name(const struct skge_hw *hw) static int skge_reset(struct skge_hw *hw) { u16 ctst; - u8 t8, mac_cfg; + u8 t8, mac_cfg, pmd_type, phy_type; int i; ctst = skge_read16(hw, B0_CTST); @@ -2884,18 +2884,19 @@ static int skge_reset(struct skge_hw *hw) ctst & (CS_CLK_RUN_HOT|CS_CLK_RUN_RST|CS_CLK_RUN_ENA)); hw->chip_id = skge_read8(hw, B2_CHIP_ID); - hw->phy_type = skge_read8(hw, B2_E_1) & 0xf; - hw->pmd_type = skge_read8(hw, B2_PMD_TYP); + phy_type = skge_read8(hw, B2_E_1) & 0xf; + pmd_type = skge_read8(hw, B2_PMD_TYP); + hw->copper = (pmd_type == 'T' || pmd_type == '1'); switch (hw->chip_id) { case CHIP_ID_GENESIS: - switch (hw->phy_type) { + switch (phy_type) { case SK_PHY_BCOM: hw->phy_addr = PHY_ADDR_BCOM; break; default: printk(KERN_ERR PFX "%s: unsupported phy type 0x%x\n", - pci_name(hw->pdev), hw->phy_type); + pci_name(hw->pdev), phy_type); return -EOPNOTSUPP; } break; @@ -2903,13 +2904,10 @@ static int skge_reset(struct skge_hw *hw) case CHIP_ID_YUKON: case CHIP_ID_YUKON_LITE: case CHIP_ID_YUKON_LP: - if (hw->phy_type < SK_PHY_MARV_COPPER && hw->pmd_type != 'S') - hw->phy_type = SK_PHY_MARV_COPPER; + if (phy_type < SK_PHY_MARV_COPPER && pmd_type != 'S') + hw->copper = 1; hw->phy_addr = PHY_ADDR_MARV; - if (!iscopper(hw)) - hw->phy_type = SK_PHY_MARV_FIBER; - break; default: diff --git a/drivers/net/skge.h b/drivers/net/skge.h index 636729fcbbaa..f1680beb8e68 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h @@ -2460,24 +2460,17 @@ struct skge_hw { u8 chip_id; u8 chip_rev; - u8 phy_type; - u8 pmd_type; - u16 phy_addr; + u8 copper; u8 ports; u32 ram_size; u32 ram_offset; + u16 phy_addr; struct tasklet_struct ext_tasklet; spinlock_t phy_lock; }; - -static inline int iscopper(const struct skge_hw *hw) -{ - return (hw->pmd_type == 'T'); -} - enum { FLOW_MODE_NONE = 0, /* No Flow-Control */ FLOW_MODE_LOC_SEND = 1, /* Local station sends PAUSE */ From c59230818f7a8969c2f9d3b601745679127a4016 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Aug 2005 14:01:02 -0700 Subject: [PATCH 097/584] [PATCH] skge: increase receive flush threshold default The flush threshold in the MAC chip should be increased. Found while reviewing vendor version of sk98lin driver. Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Garzik --- drivers/net/skge.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 3990829d3c46..38fc66a1e14c 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -1780,7 +1780,12 @@ static void yukon_mac_init(struct skge_hw *hw, int port) reg &= ~GMF_RX_F_FL_ON; skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_CLR); skge_write16(hw, SK_REG(port, RX_GMF_CTRL_T), reg); - skge_write16(hw, SK_REG(port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF); + /* + * because Pause Packet Truncation in GMAC is not working + * we have to increase the Flush Threshold to 64 bytes + * in order to flush pause packets in Rx FIFO on Yukon-1 + */ + skge_write16(hw, SK_REG(port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF+1); /* Configure Tx MAC FIFO */ skge_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_CLR); From 54cfb5aa0f4859bd38706eabe0118175780a542f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Aug 2005 14:01:05 -0700 Subject: [PATCH 098/584] [PATCH] skge: turn on link status LED Turn on the link status LED when link comes up. Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Garzik --- drivers/net/skge.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 38fc66a1e14c..48a43b84ea5f 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -42,7 +42,7 @@ #include "skge.h" #define DRV_NAME "skge" -#define DRV_VERSION "0.8" +#define DRV_VERSION "0.9" #define PFX DRV_NAME " " #define DEFAULT_TX_RING_SIZE 128 @@ -876,6 +876,9 @@ static int skge_rx_fill(struct skge_port *skge) static void skge_link_up(struct skge_port *skge) { + skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), + LED_BLK_OFF|LED_SYNC_OFF|LED_ON); + netif_carrier_on(skge->netdev); if (skge->tx_avail > MAX_SKB_FRAGS + 1) netif_wake_queue(skge->netdev); @@ -894,6 +897,7 @@ static void skge_link_up(struct skge_port *skge) static void skge_link_down(struct skge_port *skge) { + skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF); netif_carrier_off(skge->netdev); netif_stop_queue(skge->netdev); From 686762c8049457a6255ad5caf9275c100fbaceb6 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 17 Aug 2005 13:53:13 -0500 Subject: [PATCH 099/584] JFS: Initialize dentry->d_op for negative dentries too Signed-off-by: Dave Kleikamp --- fs/jfs/namei.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 1cae14e741eb..49ccde3937f9 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1390,6 +1390,8 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); + if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) + dentry->d_op = &jfs_ci_dentry_operations; if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1417,9 +1419,6 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc return ERR_PTR(-EACCES); } - if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; - dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) From a4e137ab1447fc5009f21e257971aa60a9ec98fb Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 10:06:59 +0100 Subject: [PATCH 100/584] [MFD] Add multimedia communication port core support Add support for the core of the multimedia communication port framework. This is a port used to communicate with devices with two DMA paths and a control path. Signed-off-by: Russell King --- arch/arm/Kconfig | 2 + drivers/Kconfig | 2 + drivers/Makefile | 2 +- drivers/mfd/Kconfig | 10 ++ drivers/mfd/Makefile | 5 + drivers/mfd/mcp-core.c | 255 +++++++++++++++++++++++++++++++++++++++++ drivers/mfd/mcp.h | 66 +++++++++++ 7 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 drivers/mfd/Kconfig create mode 100644 drivers/mfd/Makefile create mode 100644 drivers/mfd/mcp-core.c create mode 100644 drivers/mfd/mcp.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7bc4a583f4e1..0a7700ae8dee 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -752,6 +752,8 @@ source "drivers/hwmon/Kconfig" source "drivers/misc/Kconfig" +source "drivers/mfd/Kconfig" + source "drivers/media/Kconfig" source "drivers/video/Kconfig" diff --git a/drivers/Kconfig b/drivers/Kconfig index cecab0acc3fe..46d655fab115 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -48,6 +48,8 @@ source "drivers/hwmon/Kconfig" source "drivers/misc/Kconfig" +source "drivers/mfd/Kconfig" + source "drivers/media/Kconfig" source "drivers/video/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 126a851d5653..9663132ed825 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_FB_INTEL) += video/intelfb/ obj-$(CONFIG_SERIO) += input/serio/ obj-y += serial/ obj-$(CONFIG_PARPORT) += parport/ -obj-y += base/ block/ misc/ net/ media/ +obj-y += base/ block/ misc/ mfd/ net/ media/ obj-$(CONFIG_NUBUS) += nubus/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_PPC_PMAC) += macintosh/ diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig new file mode 100644 index 000000000000..e7d1f31aafff --- /dev/null +++ b/drivers/mfd/Kconfig @@ -0,0 +1,10 @@ +# +# Multifunction miscellaneous devices +# + +menu "Multimedia Capabilities Port drivers" + +config MCP + tristate + +endmenu diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile new file mode 100644 index 000000000000..ff31f281e28c --- /dev/null +++ b/drivers/mfd/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for multifunction miscellaneous devices +# + +obj-$(CONFIG_MCP) += mcp-core.o diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c new file mode 100644 index 000000000000..c75d713c01e4 --- /dev/null +++ b/drivers/mfd/mcp-core.c @@ -0,0 +1,255 @@ +/* + * linux/drivers/mfd/mcp-core.c + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * Generic MCP (Multimedia Communications Port) layer. All MCP locking + * is solely held within this file. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include "mcp.h" + +#define to_mcp(d) container_of(d, struct mcp, attached_device) +#define to_mcp_driver(d) container_of(d, struct mcp_driver, drv) + +static int mcp_bus_match(struct device *dev, struct device_driver *drv) +{ + return 1; +} + +static int mcp_bus_probe(struct device *dev) +{ + struct mcp *mcp = to_mcp(dev); + struct mcp_driver *drv = to_mcp_driver(dev->driver); + + return drv->probe(mcp); +} + +static int mcp_bus_remove(struct device *dev) +{ + struct mcp *mcp = to_mcp(dev); + struct mcp_driver *drv = to_mcp_driver(dev->driver); + + drv->remove(mcp); + return 0; +} + +static int mcp_bus_suspend(struct device *dev, pm_message_t state) +{ + struct mcp *mcp = to_mcp(dev); + int ret = 0; + + if (dev->driver) { + struct mcp_driver *drv = to_mcp_driver(dev->driver); + + ret = drv->suspend(mcp, state); + } + return ret; +} + +static int mcp_bus_resume(struct device *dev) +{ + struct mcp *mcp = to_mcp(dev); + int ret = 0; + + if (dev->driver) { + struct mcp_driver *drv = to_mcp_driver(dev->driver); + + ret = drv->resume(mcp); + } + return ret; +} + +static struct bus_type mcp_bus_type = { + .name = "mcp", + .match = mcp_bus_match, + .suspend = mcp_bus_suspend, + .resume = mcp_bus_resume, +}; + +/** + * mcp_set_telecom_divisor - set the telecom divisor + * @mcp: MCP interface structure + * @div: SIB clock divisor + * + * Set the telecom divisor on the MCP interface. The resulting + * sample rate is SIBCLOCK/div. + */ +void mcp_set_telecom_divisor(struct mcp *mcp, unsigned int div) +{ + spin_lock_irq(&mcp->lock); + mcp->ops->set_telecom_divisor(mcp, div); + spin_unlock_irq(&mcp->lock); +} +EXPORT_SYMBOL(mcp_set_telecom_divisor); + +/** + * mcp_set_audio_divisor - set the audio divisor + * @mcp: MCP interface structure + * @div: SIB clock divisor + * + * Set the audio divisor on the MCP interface. + */ +void mcp_set_audio_divisor(struct mcp *mcp, unsigned int div) +{ + spin_lock_irq(&mcp->lock); + mcp->ops->set_audio_divisor(mcp, div); + spin_unlock_irq(&mcp->lock); +} +EXPORT_SYMBOL(mcp_set_audio_divisor); + +/** + * mcp_reg_write - write a device register + * @mcp: MCP interface structure + * @reg: 4-bit register index + * @val: 16-bit data value + * + * Write a device register. The MCP interface must be enabled + * to prevent this function hanging. + */ +void mcp_reg_write(struct mcp *mcp, unsigned int reg, unsigned int val) +{ + unsigned long flags; + + spin_lock_irqsave(&mcp->lock, flags); + mcp->ops->reg_write(mcp, reg, val); + spin_unlock_irqrestore(&mcp->lock, flags); +} +EXPORT_SYMBOL(mcp_reg_write); + +/** + * mcp_reg_read - read a device register + * @mcp: MCP interface structure + * @reg: 4-bit register index + * + * Read a device register and return its value. The MCP interface + * must be enabled to prevent this function hanging. + */ +unsigned int mcp_reg_read(struct mcp *mcp, unsigned int reg) +{ + unsigned long flags; + unsigned int val; + + spin_lock_irqsave(&mcp->lock, flags); + val = mcp->ops->reg_read(mcp, reg); + spin_unlock_irqrestore(&mcp->lock, flags); + + return val; +} +EXPORT_SYMBOL(mcp_reg_read); + +/** + * mcp_enable - enable the MCP interface + * @mcp: MCP interface to enable + * + * Enable the MCP interface. Each call to mcp_enable will need + * a corresponding call to mcp_disable to disable the interface. + */ +void mcp_enable(struct mcp *mcp) +{ + spin_lock_irq(&mcp->lock); + if (mcp->use_count++ == 0) + mcp->ops->enable(mcp); + spin_unlock_irq(&mcp->lock); +} +EXPORT_SYMBOL(mcp_enable); + +/** + * mcp_disable - disable the MCP interface + * @mcp: MCP interface to disable + * + * Disable the MCP interface. The MCP interface will only be + * disabled once the number of calls to mcp_enable matches the + * number of calls to mcp_disable. + */ +void mcp_disable(struct mcp *mcp) +{ + unsigned long flags; + + spin_lock_irqsave(&mcp->lock, flags); + if (--mcp->use_count == 0) + mcp->ops->disable(mcp); + spin_unlock_irqrestore(&mcp->lock, flags); +} +EXPORT_SYMBOL(mcp_disable); + +static void mcp_release(struct device *dev) +{ + struct mcp *mcp = container_of(dev, struct mcp, attached_device); + + kfree(mcp); +} + +struct mcp *mcp_host_alloc(struct device *parent, size_t size) +{ + struct mcp *mcp; + + mcp = kmalloc(sizeof(struct mcp) + size, GFP_KERNEL); + if (mcp) { + memset(mcp, 0, sizeof(struct mcp) + size); + spin_lock_init(&mcp->lock); + mcp->attached_device.parent = parent; + mcp->attached_device.bus = &mcp_bus_type; + mcp->attached_device.dma_mask = parent->dma_mask; + mcp->attached_device.release = mcp_release; + } + return mcp; +} +EXPORT_SYMBOL(mcp_host_alloc); + +int mcp_host_register(struct mcp *mcp) +{ + strcpy(mcp->attached_device.bus_id, "mcp0"); + return device_register(&mcp->attached_device); +} +EXPORT_SYMBOL(mcp_host_register); + +void mcp_host_unregister(struct mcp *mcp) +{ + device_unregister(&mcp->attached_device); +} +EXPORT_SYMBOL(mcp_host_unregister); + +int mcp_driver_register(struct mcp_driver *mcpdrv) +{ + mcpdrv->drv.bus = &mcp_bus_type; + mcpdrv->drv.probe = mcp_bus_probe; + mcpdrv->drv.remove = mcp_bus_remove; + return driver_register(&mcpdrv->drv); +} +EXPORT_SYMBOL(mcp_driver_register); + +void mcp_driver_unregister(struct mcp_driver *mcpdrv) +{ + driver_unregister(&mcpdrv->drv); +} +EXPORT_SYMBOL(mcp_driver_unregister); + +static int __init mcp_init(void) +{ + return bus_register(&mcp_bus_type); +} + +static void __exit mcp_exit(void) +{ + bus_unregister(&mcp_bus_type); +} + +module_init(mcp_init); +module_exit(mcp_exit); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("Core multimedia communications port driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h new file mode 100644 index 000000000000..c093a93b8808 --- /dev/null +++ b/drivers/mfd/mcp.h @@ -0,0 +1,66 @@ +/* + * linux/drivers/mfd/mcp.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef MCP_H +#define MCP_H + +struct mcp_ops; + +struct mcp { + struct module *owner; + struct mcp_ops *ops; + spinlock_t lock; + int use_count; + unsigned int sclk_rate; + unsigned int rw_timeout; + dma_device_t dma_audio_rd; + dma_device_t dma_audio_wr; + dma_device_t dma_telco_rd; + dma_device_t dma_telco_wr; + struct device attached_device; +}; + +struct mcp_ops { + void (*set_telecom_divisor)(struct mcp *, unsigned int); + void (*set_audio_divisor)(struct mcp *, unsigned int); + void (*reg_write)(struct mcp *, unsigned int, unsigned int); + unsigned int (*reg_read)(struct mcp *, unsigned int); + void (*enable)(struct mcp *); + void (*disable)(struct mcp *); +}; + +void mcp_set_telecom_divisor(struct mcp *, unsigned int); +void mcp_set_audio_divisor(struct mcp *, unsigned int); +void mcp_reg_write(struct mcp *, unsigned int, unsigned int); +unsigned int mcp_reg_read(struct mcp *, unsigned int); +void mcp_enable(struct mcp *); +void mcp_disable(struct mcp *); +#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) + +struct mcp *mcp_host_alloc(struct device *, size_t); +int mcp_host_register(struct mcp *); +void mcp_host_unregister(struct mcp *); + +struct mcp_driver { + struct device_driver drv; + int (*probe)(struct mcp *); + void (*remove)(struct mcp *); + int (*suspend)(struct mcp *, pm_message_t); + int (*resume)(struct mcp *); +}; + +int mcp_driver_register(struct mcp_driver *); +void mcp_driver_unregister(struct mcp_driver *); + +#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) +#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) + +#define mcp_priv(mcp) ((void *)((mcp)+1)) + +#endif From 5e742ad66b4a8ba6f9d729660f822676d9e405d4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 10:08:15 +0100 Subject: [PATCH 101/584] [MFD] Add SA11x0 MCP support This adds support for the MCP interface found on SA11x0 devices. Signed-off-by: Russell King --- drivers/mfd/Kconfig | 6 + drivers/mfd/Makefile | 1 + drivers/mfd/mcp-sa11x0.c | 275 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 drivers/mfd/mcp-sa11x0.c diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index e7d1f31aafff..1588a59e3767 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -7,4 +7,10 @@ menu "Multimedia Capabilities Port drivers" config MCP tristate +# Interface drivers +config MCP_SA11X0 + tristate "Support SA11x0 MCP interface" + depends on ARCH_SA1100 + select MCP + endmenu diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index ff31f281e28c..98bdd6a42188 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_MCP) += mcp-core.o +obj-$(CONFIG_MCP_SA11X0) += mcp-sa11x0.o diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c new file mode 100644 index 000000000000..25699fa37fef --- /dev/null +++ b/drivers/mfd/mcp-sa11x0.c @@ -0,0 +1,275 @@ +/* + * linux/drivers/mfd/mcp-sa11x0.c + * + * Copyright (C) 2001-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * SA11x0 MCP (Multimedia Communications Port) driver. + * + * MCP read/write timeouts from Jordi Colomer, rehacked by rmk. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "mcp.h" + +struct mcp_sa11x0 { + u32 mccr0; + u32 mccr1; +}; + +#define priv(mcp) ((struct mcp_sa11x0 *)mcp_priv(mcp)) + +static void +mcp_sa11x0_set_telecom_divisor(struct mcp *mcp, unsigned int divisor) +{ + unsigned int mccr0; + + divisor /= 32; + + mccr0 = Ser4MCCR0 & ~0x00007f00; + mccr0 |= divisor << 8; + Ser4MCCR0 = mccr0; +} + +static void +mcp_sa11x0_set_audio_divisor(struct mcp *mcp, unsigned int divisor) +{ + unsigned int mccr0; + + divisor /= 32; + + mccr0 = Ser4MCCR0 & ~0x0000007f; + mccr0 |= divisor; + Ser4MCCR0 = mccr0; +} + +/* + * Write data to the device. The bit should be set after 3 subframe + * times (each frame is 64 clocks). We wait a maximum of 6 subframes. + * We really should try doing something more productive while we + * wait. + */ +static void +mcp_sa11x0_write(struct mcp *mcp, unsigned int reg, unsigned int val) +{ + int ret = -ETIME; + int i; + + Ser4MCDR2 = reg << 17 | MCDR2_Wr | (val & 0xffff); + + for (i = 0; i < 2; i++) { + udelay(mcp->rw_timeout); + if (Ser4MCSR & MCSR_CWC) { + ret = 0; + break; + } + } + + if (ret < 0) + printk(KERN_WARNING "mcp: write timed out\n"); +} + +/* + * Read data from the device. The bit should be set after 3 subframe + * times (each frame is 64 clocks). We wait a maximum of 6 subframes. + * We really should try doing something more productive while we + * wait. + */ +static unsigned int +mcp_sa11x0_read(struct mcp *mcp, unsigned int reg) +{ + int ret = -ETIME; + int i; + + Ser4MCDR2 = reg << 17 | MCDR2_Rd; + + for (i = 0; i < 2; i++) { + udelay(mcp->rw_timeout); + if (Ser4MCSR & MCSR_CRC) { + ret = Ser4MCDR2 & 0xffff; + break; + } + } + + if (ret < 0) + printk(KERN_WARNING "mcp: read timed out\n"); + + return ret; +} + +static void mcp_sa11x0_enable(struct mcp *mcp) +{ + Ser4MCSR = -1; + Ser4MCCR0 |= MCCR0_MCE; +} + +static void mcp_sa11x0_disable(struct mcp *mcp) +{ + Ser4MCCR0 &= ~MCCR0_MCE; +} + +/* + * Our methods. + */ +static struct mcp_ops mcp_sa11x0 = { + .set_telecom_divisor = mcp_sa11x0_set_telecom_divisor, + .set_audio_divisor = mcp_sa11x0_set_audio_divisor, + .reg_write = mcp_sa11x0_write, + .reg_read = mcp_sa11x0_read, + .enable = mcp_sa11x0_enable, + .disable = mcp_sa11x0_disable, +}; + +static int mcp_sa11x0_probe(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct mcp *mcp; + int ret; + + if (!machine_is_adsbitsy() && !machine_is_assabet() && + !machine_is_cerf() && !machine_is_flexanet() && + !machine_is_freebird() && !machine_is_graphicsclient() && + !machine_is_graphicsmaster() && !machine_is_lart() && + !machine_is_omnimeter() && !machine_is_pfs168() && + !machine_is_shannon() && !machine_is_simpad() && + !machine_is_yopy()) + return -ENODEV; + + if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp")) + return -EBUSY; + + mcp = mcp_host_alloc(&pdev->dev, sizeof(struct mcp_sa11x0)); + if (!mcp) { + ret = -ENOMEM; + goto release; + } + + mcp->owner = THIS_MODULE; + mcp->ops = &mcp_sa11x0; + mcp->sclk_rate = 11981000, + mcp->dma_audio_rd = DMA_Ser4MCP0Rd; + mcp->dma_audio_wr = DMA_Ser4MCP0Wr; + mcp->dma_telco_rd = DMA_Ser4MCP1Rd; + mcp->dma_telco_wr = DMA_Ser4MCP1Wr; + + dev_set_drvdata(dev, mcp); + + if (machine_is_assabet()) { + ASSABET_BCR_set(ASSABET_BCR_CODEC_RST); + } + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + + Ser4MCSR = -1; + Ser4MCCR1 = 0; + Ser4MCCR0 = 0x00007f7f | MCCR0_ADM; + + /* + * Calculate the read/write timeout (us) from the bit clock + * rate. This is the period for 3 64-bit frames. Always + * round this time up. + */ + mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) / + mcp->sclk_rate; + + ret = mcp_host_register(mcp); + if (ret == 0) + goto out; + + release: + release_mem_region(0x80060000, 0x60); + dev_set_drvdata(dev, NULL); + + out: + return ret; +} + +static int mcp_sa11x0_remove(struct device *dev) +{ + struct mcp *mcp = dev_get_drvdata(dev); + + dev_set_drvdata(dev, NULL); + mcp_host_unregister(mcp); + release_mem_region(0x80060000, 0x60); + + return 0; +} + +static int mcp_sa11x0_suspend(struct device *dev, pm_message_t state, u32 level) +{ + struct mcp *mcp = dev_get_drvdata(dev); + + if (level == SUSPEND_DISABLE) { + priv(mcp)->mccr0 = Ser4MCCR0; + priv(mcp)->mccr1 = Ser4MCCR1; + Ser4MCCR0 &= ~MCCR0_MCE; + } + return 0; +} + +static int mcp_sa11x0_resume(struct device *dev, u32 level) +{ + struct mcp *mcp = dev_get_drvdata(dev); + + if (level == RESUME_RESTORE_STATE) { + Ser4MCCR1 = priv(mcp)->mccr1; + Ser4MCCR0 = priv(mcp)->mccr0; + } + return 0; +} + +/* + * The driver for the SA11x0 MCP port. + */ +static struct device_driver mcp_sa11x0_driver = { + .name = "sa11x0-mcp", + .bus = &platform_bus_type, + .probe = mcp_sa11x0_probe, + .remove = mcp_sa11x0_remove, + .suspend = mcp_sa11x0_suspend, + .resume = mcp_sa11x0_resume, +}; + +/* + * This needs re-working + */ +static int __init mcp_sa11x0_init(void) +{ + return driver_register(&mcp_sa11x0_driver); +} + +static void __exit mcp_sa11x0_exit(void) +{ + driver_unregister(&mcp_sa11x0_driver); +} + +module_init(mcp_sa11x0_init); +module_exit(mcp_sa11x0_exit); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("SA11x0 multimedia communications port driver"); +MODULE_LICENSE("GPL"); From 323cdfc191b7c1597dc748175062c368568d6af4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 10:10:46 +0100 Subject: [PATCH 102/584] [MFD] Add SA11x0 MCP platform device support Add platform device data for the SA11x0 MCP device. This allows platforms to customise the configuration of the SA11x0 MCP device according to their needs. Signed-off-by: Russell King --- arch/arm/mach-sa1100/assabet.c | 7 +++++++ arch/arm/mach-sa1100/cerf.c | 7 +++++++ arch/arm/mach-sa1100/generic.c | 5 +++++ arch/arm/mach-sa1100/generic.h | 3 +++ arch/arm/mach-sa1100/lart.c | 12 ++++++++++++ arch/arm/mach-sa1100/shannon.c | 7 +++++++ arch/arm/mach-sa1100/simpad.c | 7 +++++++ drivers/mfd/mcp-sa11x0.c | 20 ++++++++++---------- include/asm-arm/arch-sa1100/mcp.h | 21 +++++++++++++++++++++ 9 files changed, 79 insertions(+), 10 deletions(-) create mode 100644 include/asm-arm/arch-sa1100/mcp.h diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 4d4d303ee3a8..24687f511bf5 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "generic.h" @@ -198,6 +199,11 @@ static struct irda_platform_data assabet_irda_data = { .set_speed = assabet_irda_set_speed, }; +static struct mcp_plat_data assabet_mcp_data = { + .mccr0 = MCCR0_ADM, + .sclk_rate = 11981000, +}; + static void __init assabet_init(void) { /* @@ -246,6 +252,7 @@ static void __init assabet_init(void) sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources, ARRAY_SIZE(assabet_flash_resources)); sa11x0_set_irda_data(&assabet_irda_data); + sa11x0_set_mcp_data(&assabet_mcp_data); } /* diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index 0aa918e24c31..9484be7dc671 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -29,6 +29,7 @@ #include #include +#include #include "generic.h" static struct resource cerfuart2_resources[] = { @@ -116,10 +117,16 @@ static void __init cerf_map_io(void) GPDR |= CERF_GPIO_CF_RESET; } +static struct mcp_plat_data cerf_mcp_data = { + .mccr0 = MCCR0_ADM, + .sclk_rate = 11981000, +}; + static void __init cerf_init(void) { platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices)); sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1); + sa11x0_set_mcp_data(&cerf_mcp_data); } MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube") diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 95ae217be1bc..3f1e358455e5 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -221,6 +221,11 @@ static struct platform_device sa11x0mcp_device = { .resource = sa11x0mcp_resources, }; +void sa11x0_set_mcp_data(struct mcp_plat_data *data) +{ + sa11x0mcp_device.dev.platform_data = data; +} + static struct resource sa11x0ssp_resources[] = { [0] = { .start = 0x80070000, diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index bfe41da9923e..279e3afa3c39 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -34,5 +34,8 @@ struct resource; extern void sa11x0_set_flash_data(struct flash_platform_data *flash, struct resource *res, int nr); +struct sa11x0_ssp_plat_ops; +extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops); + struct irda_platform_data; void sa11x0_set_irda_data(struct irda_platform_data *irda); diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index 870b488aeda4..ed6744d480af 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -13,12 +13,23 @@ #include #include #include +#include #include "generic.h" #warning "include/asm/arch-sa1100/ide.h needs fixing for lart" +static struct mcp_plat_data lart_mcp_data = { + .mccr0 = MCCR0_ADM, + .sclk_rate = 11981000, +}; + +static void __init lart_init(void) +{ + sa11x0_set_mcp_data(&lart_mcp_data); +} + static struct map_desc lart_io_desc[] __initdata = { /* virtual physical length type */ { 0xe8000000, 0x00000000, 0x00400000, MT_DEVICE }, /* main flash memory */ @@ -47,5 +58,6 @@ MACHINE_START(LART, "LART") .boot_params = 0xc0000100, .map_io = lart_map_io, .init_irq = sa1100_init_irq, + .init_machine = lart_init, .timer = &sa1100_timer, MACHINE_END diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index 43a00359fcdd..7482288278d9 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "generic.h" @@ -52,9 +53,15 @@ static struct resource shannon_flash_resource = { .flags = IORESOURCE_MEM, }; +static struct mcp_plat_data shannon_mcp_data = { + .mccr0 = MCCR0_ADM, + .sclk_rate = 11981000, +}; + static void __init shannon_init(void) { sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1); + sa11x0_set_mcp_data(&shannon_mcp_data); } static void __init shannon_map_io(void) diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 77978586b126..07f6d5fd7bb0 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -123,6 +124,11 @@ static struct resource simpad_flash_resources [] = { } }; +static struct mcp_plat_data simpad_mcp_data = { + .mccr0 = MCCR0_ADM, + .sclk_rate = 11981000, +}; + static void __init simpad_map_io(void) @@ -157,6 +163,7 @@ static void __init simpad_map_io(void) sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources, ARRAY_SIZE(simpad_flash_resources)); + sa11x0_set_mcp_data(&simpad_mcp_data); } static void simpad_power_off(void) diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 25699fa37fef..e9806fbbe696 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -140,16 +141,11 @@ static struct mcp_ops mcp_sa11x0 = { static int mcp_sa11x0_probe(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); + struct mcp_plat_data *data = pdev->dev.platform_data; struct mcp *mcp; int ret; - if (!machine_is_adsbitsy() && !machine_is_assabet() && - !machine_is_cerf() && !machine_is_flexanet() && - !machine_is_freebird() && !machine_is_graphicsclient() && - !machine_is_graphicsmaster() && !machine_is_lart() && - !machine_is_omnimeter() && !machine_is_pfs168() && - !machine_is_shannon() && !machine_is_simpad() && - !machine_is_yopy()) + if (!data) return -ENODEV; if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp")) @@ -163,7 +159,7 @@ static int mcp_sa11x0_probe(struct device *dev) mcp->owner = THIS_MODULE; mcp->ops = &mcp_sa11x0; - mcp->sclk_rate = 11981000, + mcp->sclk_rate = data->sclk_rate; mcp->dma_audio_rd = DMA_Ser4MCP0Rd; mcp->dma_audio_wr = DMA_Ser4MCP0Wr; mcp->dma_telco_rd = DMA_Ser4MCP1Rd; @@ -184,9 +180,13 @@ static int mcp_sa11x0_probe(struct device *dev) PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + /* + * Initialise device. Note that we initially + * set the sampling rate to minimum. + */ Ser4MCSR = -1; - Ser4MCCR1 = 0; - Ser4MCCR0 = 0x00007f7f | MCCR0_ADM; + Ser4MCCR1 = data->mccr1; + Ser4MCCR0 = data->mccr0 | 0x7f7f; /* * Calculate the read/write timeout (us) from the bit clock diff --git a/include/asm-arm/arch-sa1100/mcp.h b/include/asm-arm/arch-sa1100/mcp.h new file mode 100644 index 000000000000..f58a22755c61 --- /dev/null +++ b/include/asm-arm/arch-sa1100/mcp.h @@ -0,0 +1,21 @@ +/* + * linux/include/asm-arm/arch-sa1100/mcp.h + * + * Copyright (C) 2005 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_ARCH_MCP_H +#define __ASM_ARM_ARCH_MCP_H + +#include + +struct mcp_plat_data { + u32 mccr0; + u32 mccr1; + unsigned int sclk_rate; +}; + +#endif From f27ecacc54cc0e5397c9b35f6c25065f07c4448d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 21:31:00 +0100 Subject: [PATCH 103/584] [ARM] Add support for ARM GIC Add support for the ARM Generic Interrupt Controller. Signed-off-by: Russell King --- arch/arm/common/Kconfig | 3 + arch/arm/common/Makefile | 1 + arch/arm/common/gic.c | 166 +++++++++++++++++++++++++++++++++ include/asm-arm/hardware/gic.h | 41 ++++++++ 4 files changed, 211 insertions(+) create mode 100644 arch/arm/common/gic.c create mode 100644 include/asm-arm/hardware/gic.h diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 692af6b5e8ff..666ba393575b 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -1,6 +1,9 @@ config ICST525 bool +config ARM_GIC + bool + config ICST307 bool diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 11f20a43ee3a..a87886564b19 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -4,6 +4,7 @@ obj-y += rtctime.o obj-$(CONFIG_ARM_AMBA) += amba.o +obj-$(CONFIG_ARM_GIC) += gic.o obj-$(CONFIG_ICST525) += icst525.o obj-$(CONFIG_ICST307) += icst307.o obj-$(CONFIG_SA1111) += sa1111.o diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c new file mode 100644 index 000000000000..51dbf5489b6b --- /dev/null +++ b/arch/arm/common/gic.c @@ -0,0 +1,166 @@ +/* + * linux/arch/arm/common/gic.c + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Interrupt architecture for the GIC: + * + * o There is one Interrupt Distributor, which receives interrupts + * from system devices and sends them to the Interrupt Controllers. + * + * o There is one CPU Interface per CPU, which sends interrupts sent + * by the Distributor, and interrupts generated locally, to the + * associated CPU. + * + * Note that IRQs 0-31 are special - they are local to each CPU. + * As such, the enable set/clear, pending set/clear and active bit + * registers are banked per-cpu for these sources. + */ +#include +#include +#include +#include + +#include +#include +#include +#include + +static void __iomem *gic_dist_base; +static void __iomem *gic_cpu_base; + +/* + * Routines to acknowledge, disable and enable interrupts + * + * Linux assumes that when we're done with an interrupt we need to + * unmask it, in the same way we need to unmask an interrupt when + * we first enable it. + * + * The GIC has a seperate notion of "end of interrupt" to re-enable + * an interrupt after handling, in order to support hardware + * prioritisation. + * + * We can make the GIC behave in the way that Linux expects by making + * our "acknowledge" routine disable the interrupt, then mark it as + * complete. + */ +static void gic_ack_irq(unsigned int irq) +{ + u32 mask = 1 << (irq % 32); + writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4); + writel(irq, gic_cpu_base + GIC_CPU_EOI); +} + +static void gic_mask_irq(unsigned int irq) +{ + u32 mask = 1 << (irq % 32); + writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4); +} + +static void gic_unmask_irq(unsigned int irq) +{ + u32 mask = 1 << (irq % 32); + writel(mask, gic_dist_base + GIC_DIST_ENABLE_SET + (irq / 32) * 4); +} + +static void gic_set_cpu(struct irqdesc *desc, unsigned int irq, unsigned int cpu) +{ + void __iomem *reg = gic_dist_base + GIC_DIST_TARGET + (irq & ~3); + unsigned int shift = (irq % 4) * 8; + u32 val; + + val = readl(reg) & ~(0xff << shift); + val |= 1 << (cpu + shift); + writel(val, reg); +} + +static struct irqchip gic_chip = { + .ack = gic_ack_irq, + .mask = gic_mask_irq, + .unmask = gic_unmask_irq, +#ifdef CONFIG_SMP + .set_cpu = gic_set_cpu, +#endif +}; + +void __init gic_dist_init(void __iomem *base) +{ + unsigned int max_irq, i; + u32 cpumask = 1 << smp_processor_id(); + + cpumask |= cpumask << 8; + cpumask |= cpumask << 16; + + gic_dist_base = base; + + writel(0, base + GIC_DIST_CTRL); + + /* + * Find out how many interrupts are supported. + */ + max_irq = readl(base + GIC_DIST_CTR) & 0x1f; + max_irq = (max_irq + 1) * 32; + + /* + * The GIC only supports up to 1020 interrupt sources. + * Limit this to either the architected maximum, or the + * platform maximum. + */ + if (max_irq > max(1020, NR_IRQS)) + max_irq = max(1020, NR_IRQS); + + /* + * Set all global interrupts to be level triggered, active low. + */ + for (i = 32; i < max_irq; i += 16) + writel(0, base + GIC_DIST_CONFIG + i * 4 / 16); + + /* + * Set all global interrupts to this CPU only. + */ + for (i = 32; i < max_irq; i += 4) + writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); + + /* + * Set priority on all interrupts. + */ + for (i = 0; i < max_irq; i += 4) + writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); + + /* + * Disable all interrupts. + */ + for (i = 0; i < max_irq; i += 32) + writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32); + + /* + * Setup the Linux IRQ subsystem. + */ + for (i = 29; i < max_irq; i++) { + set_irq_chip(i, &gic_chip); + set_irq_handler(i, do_level_IRQ); + set_irq_flags(i, IRQF_VALID | IRQF_PROBE); + } + + writel(1, base + GIC_DIST_CTRL); +} + +void __cpuinit gic_cpu_init(void __iomem *base) +{ + gic_cpu_base = base; + writel(0xf0, base + GIC_CPU_PRIMASK); + writel(1, base + GIC_CPU_CTRL); +} + +#ifdef CONFIG_SMP +void gic_raise_softirq(cpumask_t cpumask, unsigned int irq) +{ + unsigned long map = *cpus_addr(cpumask); + + writel(map << 16 | irq, gic_dist_base + GIC_DIST_SOFTINT); +} +#endif diff --git a/include/asm-arm/hardware/gic.h b/include/asm-arm/hardware/gic.h new file mode 100644 index 000000000000..3fa5eb70f64e --- /dev/null +++ b/include/asm-arm/hardware/gic.h @@ -0,0 +1,41 @@ +/* + * linux/include/asm-arm/hardware/gic.h + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_HARDWARE_GIC_H +#define __ASM_ARM_HARDWARE_GIC_H + +#include + +#define GIC_CPU_CTRL 0x00 +#define GIC_CPU_PRIMASK 0x04 +#define GIC_CPU_BINPOINT 0x08 +#define GIC_CPU_INTACK 0x0c +#define GIC_CPU_EOI 0x10 +#define GIC_CPU_RUNNINGPRI 0x14 +#define GIC_CPU_HIGHPRI 0x18 + +#define GIC_DIST_CTRL 0x000 +#define GIC_DIST_CTR 0x004 +#define GIC_DIST_ENABLE_SET 0x100 +#define GIC_DIST_ENABLE_CLEAR 0x180 +#define GIC_DIST_PENDING_SET 0x200 +#define GIC_DIST_PENDING_CLEAR 0x280 +#define GIC_DIST_ACTIVE_BIT 0x300 +#define GIC_DIST_PRI 0x400 +#define GIC_DIST_TARGET 0x800 +#define GIC_DIST_CONFIG 0xc00 +#define GIC_DIST_SOFTINT 0xf00 + +#ifndef __ASSEMBLY__ +void gic_dist_init(void __iomem *base); +void gic_cpu_init(void __iomem *base); +void gic_raise_softirq(cpumask_t cpumask, unsigned int irq); +#endif + +#endif From 1b1b3c9b6d346d441a99e2de0b34f3ba93963ad8 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sat, 6 Aug 2005 23:47:55 +0200 Subject: [PATCH 104/584] [PATCH] forcedeth: Initialize link settings in every nv_open() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rüdiger found a bug in nv_open that explains some of the reports with duplex mismatches: nv_open calls nv_update_link_speed for initializing the hardware link speed registers. If current link setting matches the values in np->linkspeed and np->duplex, then the function does nothing. Usually, doing nothing is the right thing, but not in nv_open: During nv_open, the registers must be initialized because the nic was reset. The attached patch fixes that by setting np->linkspeed to an invalid value before calling nv_update_link_speed from nv_open. Signed-Off-By: Manfred Spraul Signed-off-by: Jeff Garzik --- drivers/net/forcedeth.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index f165ae973985..7d93948aec83 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -93,6 +93,8 @@ * 0.40: 19 Jul 2005: Add support for mac address change. * 0.41: 30 Jul 2005: Write back original MAC in nv_close instead * of nv_remove + * 0.42: 06 Aug 2005: Fix lack of link speed initialization + * in the second (and later) nv_open call * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -2178,6 +2180,9 @@ static int nv_open(struct net_device *dev) writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus); dprintk(KERN_INFO "startup: got 0x%08x.\n", miistat); } + /* set linkspeed to invalid value, thus force nv_update_linkspeed + * to init hw */ + np->linkspeed = 0; ret = nv_update_linkspeed(dev); nv_start_rx(dev); nv_start_tx(dev); From a51d74409d856e472bad753aecf1f2715718c242 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Mon, 27 Jun 2005 09:20:04 -0700 Subject: [PATCH 105/584] [PATCH] net/cycx_drv: replace delay_cycx() with msleep_interruptible() Use msleep_interruptible() instead of delay_cycx() to guarantee the task delays as expected. Remove the prototype and definition of delay_cycx(). Signed-off-by: Nishanth Aravamudan Signed-off-by: Jeff Garzik --- drivers/net/wan/cycx_drv.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c index 6e74af62ca08..9e56fc346ba4 100644 --- a/drivers/net/wan/cycx_drv.c +++ b/drivers/net/wan/cycx_drv.c @@ -56,7 +56,7 @@ #include /* for jiffies, HZ, etc. */ #include /* API definitions */ #include /* CYCX firmware module definitions */ -#include /* udelay */ +#include /* udelay, msleep_interruptible */ #include /* read[wl], write[wl], ioremap, iounmap */ #define MOD_VERSION 0 @@ -74,7 +74,6 @@ static int reset_cyc2x(void __iomem *addr); static int detect_cyc2x(void __iomem *addr); /* Miscellaneous functions */ -static void delay_cycx(int sec); static int get_option_index(long *optlist, long optval); static u16 checksum(u8 *buf, u32 len); @@ -259,7 +258,7 @@ static int memory_exists(void __iomem *addr) if (readw(addr + 0x10) == TEST_PATTERN) return 1; - delay_cycx(1); + msleep_interruptible(1 * 1000); } return 0; @@ -316,7 +315,7 @@ static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len) /* 80186 was in hold, go */ writeb(0, addr + START_CPU); - delay_cycx(1); + msleep_interruptible(1 * 1000); } /* Load data.bin file through boot (reset) interface. */ @@ -462,13 +461,13 @@ static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len) cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size); /* reset is waiting for boot */ writew(GEN_POWER_ON, pt_cycld); - delay_cycx(1); + msleep_interruptible(1 * 1000); for (j = 0 ; j < 3 ; j++) if (!readw(pt_cycld)) goto reset_loaded; else - delay_cycx(1); + msleep_interruptible(1 * 1000); } printk(KERN_ERR "%s: reset not started.\n", modname); @@ -495,7 +494,7 @@ reset_loaded: /* Arthur Ganzert's tip: wait a while after the firmware loading... seg abr 26 17:17:12 EST 1999 - acme */ - delay_cycx(7); + msleep_interruptible(7 * 1000); printk(KERN_INFO "%s: firmware loaded!\n", modname); /* enable interrupts */ @@ -547,20 +546,13 @@ static int get_option_index(long *optlist, long optval) static int reset_cyc2x(void __iomem *addr) { writeb(0, addr + RST_ENABLE); - delay_cycx(2); + msleep_interruptible(2 * 1000); writeb(0, addr + RST_DISABLE); - delay_cycx(2); + msleep_interruptible(2 * 1000); return memory_exists(addr); } -/* Delay */ -static void delay_cycx(int sec) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(sec * HZ); -} - /* Calculate 16-bit CRC using CCITT polynomial. */ static u16 checksum(u8 *buf, u32 len) { From 913168de6284cd27810e9ee7ae029d408a2a0555 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 30 Jul 2005 01:12:11 +0200 Subject: [PATCH 106/584] [PATCH] r8169: PCI ID for the Linksys EG1032 The Linksys EG1032 uses Realtek's 8169 chipset. Credit goes to Bob Wilson for the report. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/r8169.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index d5afe05cd826..2f9b3227243b 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -186,6 +186,7 @@ const static struct { static struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), }, + { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, 0x1032), }, { PCI_DEVICE(0x16ec, 0x0116), }, {0,}, }; From e9985d53e3da3b51d0334d0622c449fda78ae089 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 9 Aug 2005 02:41:00 +0200 Subject: [PATCH 107/584] [PATCH] SIS190 must select MII SIS190 must select MII since it's using it. While I was editing the Kconfig entry, I also converted the spaces to tabs. Signed-off-by: Adrian Bunk Signed-off-by: Jeff Garzik --- drivers/net/Kconfig | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 765fbb29d386..26e4aa93afd6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1922,14 +1922,15 @@ config R8169_VLAN If in doubt, say Y. config SIS190 - tristate "SiS190 gigabit ethernet support" - depends on PCI - select CRC32 - ---help--- - Say Y here if you have a SiS 190 PCI Gigabit Ethernet adapter. + tristate "SiS190 gigabit ethernet support" + depends on PCI + select CRC32 + select MII + ---help--- + Say Y here if you have a SiS 190 PCI Gigabit Ethernet adapter. - To compile this driver as a module, choose M here: the module - will be called sis190. This is recommended. + To compile this driver as a module, choose M here: the module + will be called sis190. This is recommended. config SKGE tristate "New SysKonnect GigaEthernet support (EXPERIMENTAL)" From d9a8a0a3574525bf422fd2f05eec739c0d25814f Mon Sep 17 00:00:00 2001 From: Komuro Date: Sat, 6 Aug 2005 12:01:43 +0900 Subject: [PATCH 108/584] [PATCH] network: fix fmvj18x_cs multicast code The multicast code of the fmvj18x_cs driver is broken. I fixed it to work properly. Signed-off-by: komurojun-mbn@nifty.com Signed-off-by: Jeff Garzik --- drivers/net/pcmcia/fmvj18x_cs.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 9d8197bb293a..384a736a0d2f 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -134,7 +134,7 @@ typedef struct local_info_t { u_char mc_filter[8]; } local_info_t; -#define MC_FILTERBREAK 64 +#define MC_FILTERBREAK 8 /*====================================================================*/ /* @@ -1012,7 +1012,7 @@ static void fjn_reset(struct net_device *dev) outb(BANK_1U, ioaddr + CONFIG_1); /* set the multicast table to accept none. */ - for (i = 0; i < 6; i++) + for (i = 0; i < 8; i++) outb(0x00, ioaddr + MAR_ADR + i); /* Switch to bank 2 (runtime mode) */ @@ -1269,6 +1269,16 @@ static void set_rx_mode(struct net_device *dev) u_long flags; int i; + int saved_config_0 = inb(ioaddr + CONFIG_0); + + local_irq_save(flags); + + /* Disable Tx and Rx */ + if (sram_config == 0) + outb(CONFIG0_RST, ioaddr + CONFIG_0); + else + outb(CONFIG0_RST_1, ioaddr + CONFIG_0); + if (dev->flags & IFF_PROMISC) { /* Unconditionally log net taps. */ printk("%s: Promiscuous mode enabled.\n", dev->name); @@ -1290,20 +1300,23 @@ static void set_rx_mode(struct net_device *dev) for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; i++, mclist = mclist->next) { unsigned int bit = - ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f; - mc_filter[bit >> 3] |= (1 << bit); + ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26; + mc_filter[bit >> 3] |= (1 << (bit & 7)); } + outb(2, ioaddr + RX_MODE); /* Use normal mode. */ } - local_irq_save(flags); if (memcmp(mc_filter, lp->mc_filter, sizeof(mc_filter))) { int saved_bank = inb(ioaddr + CONFIG_1); /* Switch to bank 1 and set the multicast table. */ outb(0xe4, ioaddr + CONFIG_1); for (i = 0; i < 8; i++) - outb(mc_filter[i], ioaddr + 8 + i); + outb(mc_filter[i], ioaddr + MAR_ADR + i); memcpy(lp->mc_filter, mc_filter, sizeof(mc_filter)); outb(saved_bank, ioaddr + CONFIG_1); } + + outb(saved_config_0, ioaddr + CONFIG_0); + local_irq_restore(flags); } From b4ee21f442ba7a8c43e00b32088d437259890cd0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Jul 2005 01:14:44 -0700 Subject: [PATCH 109/584] [PATCH] e1000 printk warning fix 2 drivers/net/e1000/e1000_main.c: In function `e1000_clean_tx_irq': drivers/net/e1000/e1000_main.c:2774: warning: size_t format, dma_addr_t arg (arg 8) Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/e1000/e1000_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index b82fd15d0891..9b596e0bbf95 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2767,7 +2767,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter) " next_to_use <%x>\n" " next_to_clean <%x>\n" "buffer_info[next_to_clean]\n" - " dma <%zx>\n" + " dma <%llx>\n" " time_stamp <%lx>\n" " next_to_watch <%x>\n" " jiffies <%lx>\n" @@ -2776,7 +2776,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter) E1000_READ_REG(&adapter->hw, TDT), tx_ring->next_to_use, i, - tx_ring->buffer_info[i].dma, + (unsigned long long)tx_ring->buffer_info[i].dma, tx_ring->buffer_info[i].time_stamp, eop, jiffies, From 9f7f0098eaadd9200ab52ad0dad523f797d3bf39 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 19 Aug 2005 03:52:49 -0400 Subject: [PATCH 110/584] [netdrvr eepro100] check for skb==NULL before calling rx_align(skb) --- drivers/net/eepro100.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c index 1795425f512e..8c62ced2c9b2 100644 --- a/drivers/net/eepro100.c +++ b/drivers/net/eepro100.c @@ -1263,8 +1263,8 @@ speedo_init_rx_ring(struct net_device *dev) for (i = 0; i < RX_RING_SIZE; i++) { struct sk_buff *skb; skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD)); - /* XXX: do we really want to call this before the NULL check? --hch */ - rx_align(skb); /* Align IP on 16 byte boundary */ + if (skb) + rx_align(skb); /* Align IP on 16 byte boundary */ sp->rx_skbuff[i] = skb; if (skb == NULL) break; /* OK. Just initially short of Rx bufs. */ @@ -1654,8 +1654,8 @@ static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry) struct sk_buff *skb; /* Get a fresh skbuff to replace the consumed one. */ skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD)); - /* XXX: do we really want to call this before the NULL check? --hch */ - rx_align(skb); /* Align IP on 16 byte boundary */ + if (skb) + rx_align(skb); /* Align IP on 16 byte boundary */ sp->rx_skbuff[entry] = skb; if (skb == NULL) { sp->rx_ringp[entry] = NULL; From d366b6436386875b1310ce8f70e3f9dea4647bac Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:40:08 +0100 Subject: [PATCH 111/584] [MMC] Add mmc_hostname() macro mmc_hostname() returns a pointer to the hostname for the mmc_host. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 6 +++--- drivers/mmc/mmc_sysfs.c | 2 +- drivers/mmc/mmci.c | 4 ++-- drivers/mmc/wbsd.c | 2 +- include/linux/mmc/host.h | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index eeb9f6668e69..e02e5df80be9 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -361,7 +361,7 @@ static void mmc_decode_cid(struct mmc_card *card) default: printk("%s: card has unknown MMCA version %d\n", - card->host->host_name, card->csd.mmca_vsn); + mmc_hostname(card->host), card->csd.mmca_vsn); mmc_card_set_bad(card); break; } @@ -383,7 +383,7 @@ static void mmc_decode_csd(struct mmc_card *card) csd_struct = UNSTUFF_BITS(resp, 126, 2); if (csd_struct != 1 && csd_struct != 2) { printk("%s: unrecognised CSD structure version %d\n", - card->host->host_name, csd_struct); + mmc_hostname(card->host), csd_struct); mmc_card_set_bad(card); return; } @@ -551,7 +551,7 @@ static void mmc_discover_cards(struct mmc_host *host) } if (err != MMC_ERR_NONE) { printk(KERN_ERR "%s: error requesting CID: %d\n", - host->host_name, err); + mmc_hostname(host), err); break; } diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 5556cd3b5559..3a6b325a9149 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -206,7 +206,7 @@ void mmc_init_card(struct mmc_card *card, struct mmc_host *host) int mmc_register_card(struct mmc_card *card) { snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", card->host->host_name, card->rca); + "%s:%04x", mmc_hostname(card->host), card->rca); return device_add(&card->dev); } diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 7a42966d755b..716c4ef4faf6 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -34,7 +34,7 @@ #ifdef CONFIG_MMC_DEBUG #define DBG(host,fmt,args...) \ - pr_debug("%s: %s: " fmt, host->mmc->host_name, __func__ , args) + pr_debug("%s: %s: " fmt, mmc_hostname(host->mmc), __func__ , args) #else #define DBG(host,fmt,args...) do { } while (0) #endif @@ -541,7 +541,7 @@ static int mmci_probe(struct amba_device *dev, void *id) mmc_add_host(mmc); printk(KERN_INFO "%s: MMCI rev %x cfg %02x at 0x%08lx irq %d,%d\n", - mmc->host_name, amba_rev(dev), amba_config(dev), + mmc_hostname(mmc), amba_rev(dev), amba_config(dev), dev->res.start, dev->irq[0], dev->irq[1]); init_timer(&host->timer); diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c index 974f2f36bdbe..402c2d661fb2 100644 --- a/drivers/mmc/wbsd.c +++ b/drivers/mmc/wbsd.c @@ -1796,7 +1796,7 @@ static int __devinit wbsd_init(struct device* dev, int base, int irq, int dma, mmc_add_host(mmc); - printk(KERN_INFO "%s: W83L51xD", mmc->host_name); + printk(KERN_INFO "%s: W83L51xD", mmc_hostname(mmc)); if (host->chip_id != 0) printk(" id %x", (int)host->chip_id); printk(" at 0x%x irq %d", (int)host->base, (int)host->irq); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f90f674eb3b0..307862308596 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -97,6 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) +#define mmc_hostname(x) ((x)->host_name) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); From 00b137cfda5276b3d2c87d44236fe4c5ee68b405 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:41:24 +0100 Subject: [PATCH 112/584] [MMC] Add MMC class devices Create a mmc_host class to allow enumeration of MMC host controllers even though they have no card(s) inserted. Patch based on work by Pierre Ossman. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 23 ++++++------- drivers/mmc/mmc.h | 5 +++ drivers/mmc/mmc_sysfs.c | 72 +++++++++++++++++++++++++++++++++++++++- include/linux/mmc/host.h | 1 + 4 files changed, 87 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index e02e5df80be9..3c5904834fe8 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -796,17 +796,13 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) { struct mmc_host *host; - host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); + host = mmc_alloc_host_sysfs(extra, dev); if (host) { - memset(host, 0, sizeof(struct mmc_host) + extra); - spin_lock_init(&host->lock); init_waitqueue_head(&host->wq); INIT_LIST_HEAD(&host->cards); INIT_WORK(&host->detect, mmc_rescan, host); - host->dev = dev; - /* * By default, hosts do not support SGIO or large requests. * They have to set these according to their abilities. @@ -828,15 +824,15 @@ EXPORT_SYMBOL(mmc_alloc_host); */ int mmc_add_host(struct mmc_host *host) { - static unsigned int host_num; + int ret; - snprintf(host->host_name, sizeof(host->host_name), - "mmc%d", host_num++); + ret = mmc_add_host_sysfs(host); + if (ret == 0) { + mmc_power_off(host); + mmc_detect_change(host); + } - mmc_power_off(host); - mmc_detect_change(host); - - return 0; + return ret; } EXPORT_SYMBOL(mmc_add_host); @@ -859,6 +855,7 @@ void mmc_remove_host(struct mmc_host *host) } mmc_power_off(host); + mmc_remove_host_sysfs(host); } EXPORT_SYMBOL(mmc_remove_host); @@ -872,7 +869,7 @@ EXPORT_SYMBOL(mmc_remove_host); void mmc_free_host(struct mmc_host *host) { flush_scheduled_work(); - kfree(host); + mmc_free_host_sysfs(host); } EXPORT_SYMBOL(mmc_free_host); diff --git a/drivers/mmc/mmc.h b/drivers/mmc/mmc.h index b498dffe0b11..97bae00292fa 100644 --- a/drivers/mmc/mmc.h +++ b/drivers/mmc/mmc.h @@ -13,4 +13,9 @@ void mmc_init_card(struct mmc_card *card, struct mmc_host *host); int mmc_register_card(struct mmc_card *card); void mmc_remove_card(struct mmc_card *card); + +struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev); +int mmc_add_host_sysfs(struct mmc_host *host); +void mmc_remove_host_sysfs(struct mmc_host *host); +void mmc_free_host_sysfs(struct mmc_host *host); #endif diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 3a6b325a9149..96c192057df3 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -20,6 +20,7 @@ #define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) #define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) #define MMC_ATTR(name, fmt, args...) \ static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ @@ -224,13 +225,82 @@ void mmc_remove_card(struct mmc_card *card) } +static void mmc_host_classdev_release(struct class_device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + kfree(host); +} + +static struct class mmc_host_class = { + .name = "mmc_host", + .release = mmc_host_classdev_release, +}; + +/* + * Internal function. Allocate a new MMC host. + */ +struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) +{ + struct mmc_host *host; + + host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); + if (host) { + memset(host, 0, sizeof(struct mmc_host) + extra); + + host->dev = dev; + host->class_dev.dev = host->dev; + host->class_dev.class = &mmc_host_class; + class_device_initialize(&host->class_dev); + } + + return host; +} + +/* + * Internal function. Register a new MMC host with the MMC class. + */ +int mmc_add_host_sysfs(struct mmc_host *host) +{ + static unsigned int host_num; + + snprintf(host->host_name, sizeof(host->host_name), + "mmc%d", host_num++); + + strlcpy(host->class_dev.class_id, host->host_name, BUS_ID_SIZE); + return class_device_add(&host->class_dev); +} + +/* + * Internal function. Unregister a MMC host with the MMC class. + */ +void mmc_remove_host_sysfs(struct mmc_host *host) +{ + class_device_del(&host->class_dev); +} + +/* + * Internal function. Free a MMC host. + */ +void mmc_free_host_sysfs(struct mmc_host *host) +{ + class_device_put(&host->class_dev); +} + + static int __init mmc_init(void) { - return bus_register(&mmc_bus_type); + int ret = bus_register(&mmc_bus_type); + if (ret == 0) { + ret = class_register(&mmc_host_class); + if (ret) + bus_unregister(&mmc_bus_type); + } + return ret; } static void __exit mmc_exit(void) { + class_unregister(&mmc_host_class); bus_unregister(&mmc_bus_type); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 307862308596..a74a810a1302 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -63,6 +63,7 @@ struct device; struct mmc_host { struct device *dev; + struct class_device class_dev; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; From 1ad434d7cf5f490c71cfbbb2fb91076c01c8704e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:21 +0100 Subject: [PATCH 113/584] [MMC] Use class device name for mmc host name There's no point in having the host name duplicated between the mmc_host structure and the encapsulated class device structure. Signed-off-by: Russell King --- drivers/mmc/mmc_sysfs.c | 3 +-- include/linux/mmc/host.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 96c192057df3..34fa4a3a02d5 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -263,10 +263,9 @@ int mmc_add_host_sysfs(struct mmc_host *host) { static unsigned int host_num; - snprintf(host->host_name, sizeof(host->host_name), + snprintf(host->class_dev.class_id, BUS_ID_SIZE, "mmc%d", host_num++); - strlcpy(host->class_dev.class_id, host->host_name, BUS_ID_SIZE); return class_device_add(&host->class_dev); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a74a810a1302..113cc27865f0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -68,7 +68,6 @@ struct mmc_host { unsigned int f_min; unsigned int f_max; u32 ocr_avail; - char host_name[8]; /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ @@ -98,7 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) -#define mmc_hostname(x) ((x)->host_name) +#define mmc_hostname(x) ((x)->class_dev.class_id) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); From dce773771834221817e2d359a7e07a618ba08807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:52 +0100 Subject: [PATCH 114/584] [MMC] Use an IDR for host name indicies Signed-off-by: Russell King --- drivers/mmc/mmc_sysfs.c | 21 +++++++++++++++++++-- include/linux/mmc/host.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 34fa4a3a02d5..ad8949810fc5 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,9 @@ static struct class mmc_host_class = { .release = mmc_host_classdev_release, }; +static DEFINE_IDR(mmc_host_idr); +static DEFINE_SPINLOCK(mmc_host_lock); + /* * Internal function. Allocate a new MMC host. */ @@ -261,10 +265,19 @@ struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) */ int mmc_add_host_sysfs(struct mmc_host *host) { - static unsigned int host_num; + int err; + + if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmc_host_lock); + err = idr_get_new(&mmc_host_idr, host, &host->index); + spin_unlock(&mmc_host_lock); + if (err) + return err; snprintf(host->class_dev.class_id, BUS_ID_SIZE, - "mmc%d", host_num++); + "mmc%d", host->index); return class_device_add(&host->class_dev); } @@ -275,6 +288,10 @@ int mmc_add_host_sysfs(struct mmc_host *host) void mmc_remove_host_sysfs(struct mmc_host *host) { class_device_del(&host->class_dev); + + spin_lock(&mmc_host_lock); + idr_remove(&mmc_host_idr, host->index); + spin_unlock(&mmc_host_lock); } /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 113cc27865f0..9a0893f3249e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -64,6 +64,7 @@ struct device; struct mmc_host { struct device *dev; struct class_device class_dev; + int index; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; From 84c3ea01d163a24323d827e1d280dc3346905972 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Fri, 19 Aug 2005 21:05:56 -0400 Subject: [PATCH 115/584] [netdrvr] Convert madgemc to new MCA API. Now that all tms380 devices have a valid struct device with dma_mask, remove dmalimit from tmsdev_init(). Kconfig: depend tms380tr and madgemc on MCA. abyss.c, proteon.c, skisa.c, tmspci.c, tms380tr.h: remove dmalimit parameter from tmsdev_init(). tms380tr.c: use device->dma_mask instead of dmalimit. madgemc.c: move to new MCA API using struct device. Signed-off-by: Jochen Friedrich Signed-off-by: Jeff Garzik --- drivers/net/tokenring/Kconfig | 4 +- drivers/net/tokenring/abyss.c | 2 +- drivers/net/tokenring/madgemc.c | 505 +++++++++++++++---------------- drivers/net/tokenring/proteon.c | 2 +- drivers/net/tokenring/skisa.c | 2 +- drivers/net/tokenring/tms380tr.c | 11 +- drivers/net/tokenring/tms380tr.h | 3 +- drivers/net/tokenring/tmspci.c | 2 +- 8 files changed, 265 insertions(+), 266 deletions(-) diff --git a/drivers/net/tokenring/Kconfig b/drivers/net/tokenring/Kconfig index 23d0fa4bbceb..7d5c23e8cdd7 100644 --- a/drivers/net/tokenring/Kconfig +++ b/drivers/net/tokenring/Kconfig @@ -84,7 +84,7 @@ config 3C359 config TMS380TR tristate "Generic TMS380 Token Ring ISA/PCI adapter support" - depends on TR && (PCI || ISA) + depends on TR && (PCI || ISA || MCA) select FW_LOADER ---help--- This driver provides generic support for token ring adapters @@ -158,7 +158,7 @@ config ABYSS config MADGEMC tristate "Madge Smart 16/4 Ringnode MicroChannel" - depends on TR && TMS380TR && MCA_LEGACY + depends on TR && TMS380TR && MCA help This tms380 module supports the Madge Smart 16/4 MC16 and MC32 MicroChannel adapters. diff --git a/drivers/net/tokenring/abyss.c b/drivers/net/tokenring/abyss.c index f1e4ef1188e4..9345e68c451e 100644 --- a/drivers/net/tokenring/abyss.c +++ b/drivers/net/tokenring/abyss.c @@ -139,7 +139,7 @@ static int __devinit abyss_attach(struct pci_dev *pdev, const struct pci_device_ */ dev->base_addr += 0x10; - ret = tmsdev_init(dev, PCI_MAX_ADDRESS, &pdev->dev); + ret = tmsdev_init(dev, &pdev->dev); if (ret) { printk("%s: unable to get memory for dev->priv.\n", dev->name); diff --git a/drivers/net/tokenring/madgemc.c b/drivers/net/tokenring/madgemc.c index 659cbdbef7f3..3a25d191ea4a 100644 --- a/drivers/net/tokenring/madgemc.c +++ b/drivers/net/tokenring/madgemc.c @@ -20,7 +20,7 @@ static const char version[] = "madgemc.c: v0.91 23/01/2000 by Adam Fritzler\n"; #include -#include +#include #include #include #include @@ -38,9 +38,7 @@ static const char version[] = "madgemc.c: v0.91 23/01/2000 by Adam Fritzler\n"; #define MADGEMC_IO_EXTENT 32 #define MADGEMC_SIF_OFFSET 0x08 -struct madgemc_card { - struct net_device *dev; - +struct card_info { /* * These are read from the BIA ROM. */ @@ -57,16 +55,12 @@ struct madgemc_card { unsigned int arblevel:4; unsigned int ringspeed:2; /* 0 = 4mb, 1 = 16, 2 = Auto/none */ unsigned int cabletype:1; /* 0 = RJ45, 1 = DB9 */ - - struct madgemc_card *next; }; -static struct madgemc_card *madgemc_card_list; - static int madgemc_open(struct net_device *dev); static int madgemc_close(struct net_device *dev); static int madgemc_chipset_init(struct net_device *dev); -static void madgemc_read_rom(struct madgemc_card *card); +static void madgemc_read_rom(struct net_device *dev, struct card_info *card); static unsigned short madgemc_setnselout_pins(struct net_device *dev); static void madgemc_setcabletype(struct net_device *dev, int type); @@ -151,261 +145,237 @@ static void madgemc_sifwritew(struct net_device *dev, unsigned short val, unsign -static int __init madgemc_probe(void) +static int __devinit madgemc_probe(struct device *device) { static int versionprinted; struct net_device *dev; struct net_local *tp; - struct madgemc_card *card; - int i,slot = 0; - __u8 posreg[4]; + struct card_info *card; + struct mca_device *mdev = to_mca_device(device); + int ret = 0, i = 0; - if (!MCA_bus) - return -1; - - while (slot != MCA_NOTFOUND) { - /* - * Currently we only support the MC16/32 (MCA ID 002d) - */ - slot = mca_find_unused_adapter(0x002d, slot); - if (slot == MCA_NOTFOUND) - break; + if (versionprinted++ == 0) + printk("%s", version); - /* - * If we get here, we have an adapter. - */ - if (versionprinted++ == 0) - printk("%s", version); + if(mca_device_claimed(mdev)) + return -EBUSY; + mca_device_set_claim(mdev, 1); - dev = alloc_trdev(sizeof(struct net_local)); - if (dev == NULL) { - printk("madgemc: unable to allocate dev space\n"); - if (madgemc_card_list) - return 0; - return -1; - } + dev = alloc_trdev(sizeof(struct net_local)); + if (!dev) { + printk("madgemc: unable to allocate dev space\n"); + mca_device_set_claim(mdev, 0); + ret = -ENOMEM; + goto getout; + } - SET_MODULE_OWNER(dev); - dev->dma = 0; + SET_MODULE_OWNER(dev); + dev->dma = 0; - /* - * Fetch MCA config registers - */ - for(i=0;i<4;i++) - posreg[i] = mca_read_stored_pos(slot, i+2); - - card = kmalloc(sizeof(struct madgemc_card), GFP_KERNEL); - if (card==NULL) { - printk("madgemc: unable to allocate card struct\n"); - free_netdev(dev); - if (madgemc_card_list) - return 0; - return -1; - } - card->dev = dev; + card = kmalloc(sizeof(struct card_info), GFP_KERNEL); + if (card==NULL) { + printk("madgemc: unable to allocate card struct\n"); + ret = -ENOMEM; + goto getout1; + } - /* - * Parse configuration information. This all comes - * directly from the publicly available @002d.ADF. - * Get it from Madge or your local ADF library. - */ + /* + * Parse configuration information. This all comes + * directly from the publicly available @002d.ADF. + * Get it from Madge or your local ADF library. + */ - /* - * Base address - */ - dev->base_addr = 0x0a20 + - ((posreg[2] & MC16_POS2_ADDR2)?0x0400:0) + - ((posreg[0] & MC16_POS0_ADDR1)?0x1000:0) + - ((posreg[3] & MC16_POS3_ADDR3)?0x2000:0); + /* + * Base address + */ + dev->base_addr = 0x0a20 + + ((mdev->pos[2] & MC16_POS2_ADDR2)?0x0400:0) + + ((mdev->pos[0] & MC16_POS0_ADDR1)?0x1000:0) + + ((mdev->pos[3] & MC16_POS3_ADDR3)?0x2000:0); - /* - * Interrupt line - */ - switch(posreg[0] >> 6) { /* upper two bits */ + /* + * Interrupt line + */ + switch(mdev->pos[0] >> 6) { /* upper two bits */ case 0x1: dev->irq = 3; break; case 0x2: dev->irq = 9; break; /* IRQ 2 = IRQ 9 */ case 0x3: dev->irq = 10; break; default: dev->irq = 0; break; - } + } - if (dev->irq == 0) { - printk("%s: invalid IRQ\n", dev->name); - goto getout1; - } + if (dev->irq == 0) { + printk("%s: invalid IRQ\n", dev->name); + ret = -EBUSY; + goto getout2; + } - if (!request_region(dev->base_addr, MADGEMC_IO_EXTENT, - "madgemc")) { - printk(KERN_INFO "madgemc: unable to setup Smart MC in slot %d because of I/O base conflict at 0x%04lx\n", slot, dev->base_addr); - dev->base_addr += MADGEMC_SIF_OFFSET; - goto getout1; - } + if (!request_region(dev->base_addr, MADGEMC_IO_EXTENT, + "madgemc")) { + printk(KERN_INFO "madgemc: unable to setup Smart MC in slot %d because of I/O base conflict at 0x%04lx\n", mdev->slot, dev->base_addr); dev->base_addr += MADGEMC_SIF_OFFSET; + ret = -EBUSY; + goto getout2; + } + dev->base_addr += MADGEMC_SIF_OFFSET; + + /* + * Arbitration Level + */ + card->arblevel = ((mdev->pos[0] >> 1) & 0x7) + 8; + + /* + * Burst mode and Fairness + */ + card->burstmode = ((mdev->pos[2] >> 6) & 0x3); + card->fairness = ((mdev->pos[2] >> 4) & 0x1); + + /* + * Ring Speed + */ + if ((mdev->pos[1] >> 2)&0x1) + card->ringspeed = 2; /* not selected */ + else if ((mdev->pos[2] >> 5) & 0x1) + card->ringspeed = 1; /* 16Mb */ + else + card->ringspeed = 0; /* 4Mb */ + + /* + * Cable type + */ + if ((mdev->pos[1] >> 6)&0x1) + card->cabletype = 1; /* STP/DB9 */ + else + card->cabletype = 0; /* UTP/RJ-45 */ + + + /* + * ROM Info. This requires us to actually twiddle + * bits on the card, so we must ensure above that + * the base address is free of conflict (request_region above). + */ + madgemc_read_rom(dev, card); - /* - * Arbitration Level - */ - card->arblevel = ((posreg[0] >> 1) & 0x7) + 8; - - /* - * Burst mode and Fairness - */ - card->burstmode = ((posreg[2] >> 6) & 0x3); - card->fairness = ((posreg[2] >> 4) & 0x1); - - /* - * Ring Speed - */ - if ((posreg[1] >> 2)&0x1) - card->ringspeed = 2; /* not selected */ - else if ((posreg[2] >> 5) & 0x1) - card->ringspeed = 1; /* 16Mb */ - else - card->ringspeed = 0; /* 4Mb */ - - /* - * Cable type - */ - if ((posreg[1] >> 6)&0x1) - card->cabletype = 1; /* STP/DB9 */ - else - card->cabletype = 0; /* UTP/RJ-45 */ - - - /* - * ROM Info. This requires us to actually twiddle - * bits on the card, so we must ensure above that - * the base address is free of conflict (request_region above). - */ - madgemc_read_rom(card); + if (card->manid != 0x4d) { /* something went wrong */ + printk(KERN_INFO "%s: Madge MC ROM read failed (unknown manufacturer ID %02x)\n", dev->name, card->manid); + goto getout3; + } - if (card->manid != 0x4d) { /* something went wrong */ - printk(KERN_INFO "%s: Madge MC ROM read failed (unknown manufacturer ID %02x)\n", dev->name, card->manid); - goto getout; - } - - if ((card->cardtype != 0x08) && (card->cardtype != 0x0d)) { - printk(KERN_INFO "%s: Madge MC ROM read failed (unknown card ID %02x)\n", dev->name, card->cardtype); - goto getout; - } + if ((card->cardtype != 0x08) && (card->cardtype != 0x0d)) { + printk(KERN_INFO "%s: Madge MC ROM read failed (unknown card ID %02x)\n", dev->name, card->cardtype); + ret = -EIO; + goto getout3; + } - /* All cards except Rev 0 and 1 MC16's have 256kb of RAM */ - if ((card->cardtype == 0x08) && (card->cardrev <= 0x01)) - card->ramsize = 128; - else - card->ramsize = 256; + /* All cards except Rev 0 and 1 MC16's have 256kb of RAM */ + if ((card->cardtype == 0x08) && (card->cardrev <= 0x01)) + card->ramsize = 128; + else + card->ramsize = 256; - printk("%s: %s Rev %d at 0x%04lx IRQ %d\n", - dev->name, - (card->cardtype == 0x08)?MADGEMC16_CARDNAME: - MADGEMC32_CARDNAME, card->cardrev, - dev->base_addr, dev->irq); + printk("%s: %s Rev %d at 0x%04lx IRQ %d\n", + dev->name, + (card->cardtype == 0x08)?MADGEMC16_CARDNAME: + MADGEMC32_CARDNAME, card->cardrev, + dev->base_addr, dev->irq); - if (card->cardtype == 0x0d) - printk("%s: Warning: MC32 support is experimental and highly untested\n", dev->name); + if (card->cardtype == 0x0d) + printk("%s: Warning: MC32 support is experimental and highly untested\n", dev->name); + + if (card->ringspeed==2) { /* Unknown */ + printk("%s: Warning: Ring speed not set in POS -- Please run the reference disk and set it!\n", dev->name); + card->ringspeed = 1; /* default to 16mb */ + } - if (card->ringspeed==2) { /* Unknown */ - printk("%s: Warning: Ring speed not set in POS -- Please run the reference disk and set it!\n", dev->name); - card->ringspeed = 1; /* default to 16mb */ - } - - printk("%s: RAM Size: %dKB\n", dev->name, card->ramsize); + printk("%s: RAM Size: %dKB\n", dev->name, card->ramsize); - printk("%s: Ring Speed: %dMb/sec on %s\n", dev->name, - (card->ringspeed)?16:4, - card->cabletype?"STP/DB9":"UTP/RJ-45"); - printk("%s: Arbitration Level: %d\n", dev->name, - card->arblevel); + printk("%s: Ring Speed: %dMb/sec on %s\n", dev->name, + (card->ringspeed)?16:4, + card->cabletype?"STP/DB9":"UTP/RJ-45"); + printk("%s: Arbitration Level: %d\n", dev->name, + card->arblevel); - printk("%s: Burst Mode: ", dev->name); - switch(card->burstmode) { + printk("%s: Burst Mode: ", dev->name); + switch(card->burstmode) { case 0: printk("Cycle steal"); break; case 1: printk("Limited burst"); break; case 2: printk("Delayed release"); break; case 3: printk("Immediate release"); break; - } - printk(" (%s)\n", (card->fairness)?"Unfair":"Fair"); + } + printk(" (%s)\n", (card->fairness)?"Unfair":"Fair"); - /* - * Enable SIF before we assign the interrupt handler, - * just in case we get spurious interrupts that need - * handling. - */ - outb(0, dev->base_addr + MC_CONTROL_REG0); /* sanity */ - madgemc_setsifsel(dev, 1); - if (request_irq(dev->irq, madgemc_interrupt, SA_SHIRQ, - "madgemc", dev)) - goto getout; - - madgemc_chipset_init(dev); /* enables interrupts! */ - madgemc_setcabletype(dev, card->cabletype); - - /* Setup MCA structures */ - mca_set_adapter_name(slot, (card->cardtype == 0x08)?MADGEMC16_CARDNAME:MADGEMC32_CARDNAME); - mca_set_adapter_procfn(slot, madgemc_mcaproc, dev); - mca_mark_as_used(slot); - - printk("%s: Ring Station Address: ", dev->name); - printk("%2.2x", dev->dev_addr[0]); - for (i = 1; i < 6; i++) - printk(":%2.2x", dev->dev_addr[i]); - printk("\n"); - - /* XXX is ISA_MAX_ADDRESS correct here? */ - if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL)) { - printk("%s: unable to get memory for dev->priv.\n", - dev->name); - release_region(dev->base_addr-MADGEMC_SIF_OFFSET, - MADGEMC_IO_EXTENT); - - kfree(card); - tmsdev_term(dev); - free_netdev(dev); - if (madgemc_card_list) - return 0; - return -1; - } - tp = netdev_priv(dev); - - /* - * The MC16 is physically a 32bit card. However, Madge - * insists on calling it 16bit, so I'll assume here that - * they know what they're talking about. Cut off DMA - * at 16mb. - */ - tp->setnselout = madgemc_setnselout_pins; - tp->sifwriteb = madgemc_sifwriteb; - tp->sifreadb = madgemc_sifreadb; - tp->sifwritew = madgemc_sifwritew; - tp->sifreadw = madgemc_sifreadw; - tp->DataRate = (card->ringspeed)?SPEED_16:SPEED_4; - - memcpy(tp->ProductID, "Madge MCA 16/4 ", PROD_ID_SIZE + 1); - - dev->open = madgemc_open; - dev->stop = madgemc_close; - - if (register_netdev(dev) == 0) { - /* Enlist in the card list */ - card->next = madgemc_card_list; - madgemc_card_list = card; - slot++; - continue; /* successful, try to find another */ - } - - free_irq(dev->irq, dev); - getout: - release_region(dev->base_addr-MADGEMC_SIF_OFFSET, - MADGEMC_IO_EXTENT); - getout1: - kfree(card); - free_netdev(dev); - slot++; + /* + * Enable SIF before we assign the interrupt handler, + * just in case we get spurious interrupts that need + * handling. + */ + outb(0, dev->base_addr + MC_CONTROL_REG0); /* sanity */ + madgemc_setsifsel(dev, 1); + if (request_irq(dev->irq, madgemc_interrupt, SA_SHIRQ, + "madgemc", dev)) { + ret = -EBUSY; + goto getout3; } - if (madgemc_card_list) + madgemc_chipset_init(dev); /* enables interrupts! */ + madgemc_setcabletype(dev, card->cabletype); + + /* Setup MCA structures */ + mca_device_set_name(mdev, (card->cardtype == 0x08)?MADGEMC16_CARDNAME:MADGEMC32_CARDNAME); + mca_set_adapter_procfn(mdev->slot, madgemc_mcaproc, dev); + + printk("%s: Ring Station Address: ", dev->name); + printk("%2.2x", dev->dev_addr[0]); + for (i = 1; i < 6; i++) + printk(":%2.2x", dev->dev_addr[i]); + printk("\n"); + + if (tmsdev_init(dev, device)) { + printk("%s: unable to get memory for dev->priv.\n", + dev->name); + ret = -ENOMEM; + goto getout4; + } + tp = netdev_priv(dev); + + /* + * The MC16 is physically a 32bit card. However, Madge + * insists on calling it 16bit, so I'll assume here that + * they know what they're talking about. Cut off DMA + * at 16mb. + */ + tp->setnselout = madgemc_setnselout_pins; + tp->sifwriteb = madgemc_sifwriteb; + tp->sifreadb = madgemc_sifreadb; + tp->sifwritew = madgemc_sifwritew; + tp->sifreadw = madgemc_sifreadw; + tp->DataRate = (card->ringspeed)?SPEED_16:SPEED_4; + + memcpy(tp->ProductID, "Madge MCA 16/4 ", PROD_ID_SIZE + 1); + + dev->open = madgemc_open; + dev->stop = madgemc_close; + + tp->tmspriv = card; + dev_set_drvdata(device, dev); + + if (register_netdev(dev) == 0) return 0; - return -1; + + dev_set_drvdata(device, NULL); + ret = -ENOMEM; +getout4: + free_irq(dev->irq, dev); +getout3: + release_region(dev->base_addr-MADGEMC_SIF_OFFSET, + MADGEMC_IO_EXTENT); +getout2: + kfree(card); +getout1: + free_netdev(dev); +getout: + mca_device_set_claim(mdev, 0); + return ret; } /* @@ -664,12 +634,12 @@ static void madgemc_chipset_close(struct net_device *dev) * is complete. * */ -static void madgemc_read_rom(struct madgemc_card *card) +static void madgemc_read_rom(struct net_device *dev, struct card_info *card) { unsigned long ioaddr; unsigned char reg0, reg1, tmpreg0, i; - ioaddr = card->dev->base_addr; + ioaddr = dev->base_addr; reg0 = inb(ioaddr + MC_CONTROL_REG0); reg1 = inb(ioaddr + MC_CONTROL_REG1); @@ -686,9 +656,9 @@ static void madgemc_read_rom(struct madgemc_card *card) outb(tmpreg0 | MC_CONTROL_REG0_PAGE, ioaddr + MC_CONTROL_REG0); /* Read BIA */ - card->dev->addr_len = 6; + dev->addr_len = 6; for (i = 0; i < 6; i++) - card->dev->dev_addr[i] = inb(ioaddr + MC_ROM_BIA_START + i); + dev->dev_addr[i] = inb(ioaddr + MC_ROM_BIA_START + i); /* Restore original register values */ outb(reg0, ioaddr + MC_CONTROL_REG0); @@ -721,14 +691,10 @@ static int madgemc_close(struct net_device *dev) static int madgemc_mcaproc(char *buf, int slot, void *d) { struct net_device *dev = (struct net_device *)d; - struct madgemc_card *curcard = madgemc_card_list; + struct net_local *tp = dev->priv; + struct card_info *curcard = tp->tmspriv; int len = 0; - while (curcard) { /* search for card struct */ - if (curcard->dev == dev) - break; - curcard = curcard->next; - } len += sprintf(buf+len, "-------\n"); if (curcard) { struct net_local *tp = netdev_priv(dev); @@ -763,25 +729,56 @@ static int madgemc_mcaproc(char *buf, int slot, void *d) return len; } -static void __exit madgemc_exit(void) +static int __devexit madgemc_remove(struct device *device) { - struct net_device *dev; - struct madgemc_card *this_card; - - while (madgemc_card_list) { - dev = madgemc_card_list->dev; - unregister_netdev(dev); - release_region(dev->base_addr-MADGEMC_SIF_OFFSET, MADGEMC_IO_EXTENT); - free_irq(dev->irq, dev); - tmsdev_term(dev); - free_netdev(dev); - this_card = madgemc_card_list; - madgemc_card_list = this_card->next; - kfree(this_card); - } + struct net_device *dev = dev_get_drvdata(device); + struct net_local *tp; + struct card_info *card; + + if (!dev) + BUG(); + + tp = dev->priv; + card = tp->tmspriv; + kfree(card); + tp->tmspriv = NULL; + + unregister_netdev(dev); + release_region(dev->base_addr-MADGEMC_SIF_OFFSET, MADGEMC_IO_EXTENT); + free_irq(dev->irq, dev); + tmsdev_term(dev); + free_netdev(dev); + dev_set_drvdata(device, NULL); + + return 0; } -module_init(madgemc_probe); +static short madgemc_adapter_ids[] __initdata = { + 0x002d, + 0x0000 +}; + +static struct mca_driver madgemc_driver = { + .id_table = madgemc_adapter_ids, + .driver = { + .name = "madgemc", + .bus = &mca_bus_type, + .probe = madgemc_probe, + .remove = __devexit_p(madgemc_remove), + }, +}; + +static int __init madgemc_init (void) +{ + return mca_register_driver (&madgemc_driver); +} + +static void __exit madgemc_exit (void) +{ + mca_unregister_driver (&madgemc_driver); +} + +module_init(madgemc_init); module_exit(madgemc_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/net/tokenring/proteon.c b/drivers/net/tokenring/proteon.c index 0a9597738d6c..eb1423ede75c 100644 --- a/drivers/net/tokenring/proteon.c +++ b/drivers/net/tokenring/proteon.c @@ -145,7 +145,7 @@ static int __init setup_card(struct net_device *dev, struct device *pdev) err = -EIO; pdev->dma_mask = &dma_mask; - if (tmsdev_init(dev, ISA_MAX_ADDRESS, pdev)) + if (tmsdev_init(dev, pdev)) goto out4; dev->base_addr &= ~3; diff --git a/drivers/net/tokenring/skisa.c b/drivers/net/tokenring/skisa.c index 03f061941d77..3c7c66204f74 100644 --- a/drivers/net/tokenring/skisa.c +++ b/drivers/net/tokenring/skisa.c @@ -162,7 +162,7 @@ static int __init setup_card(struct net_device *dev, struct device *pdev) err = -EIO; pdev->dma_mask = &dma_mask; - if (tmsdev_init(dev, ISA_MAX_ADDRESS, pdev)) + if (tmsdev_init(dev, pdev)) goto out4; dev->base_addr &= ~3; diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c index 9a543fe2d0e6..2e39bf1f7462 100644 --- a/drivers/net/tokenring/tms380tr.c +++ b/drivers/net/tokenring/tms380tr.c @@ -2333,19 +2333,22 @@ void tmsdev_term(struct net_device *dev) DMA_BIDIRECTIONAL); } -int tmsdev_init(struct net_device *dev, unsigned long dmalimit, - struct device *pdev) +int tmsdev_init(struct net_device *dev, struct device *pdev) { struct net_local *tms_local; memset(dev->priv, 0, sizeof(struct net_local)); tms_local = netdev_priv(dev); init_waitqueue_head(&tms_local->wait_for_tok_int); - tms_local->dmalimit = dmalimit; + if (pdev->dma_mask) + tms_local->dmalimit = *pdev->dma_mask; + else + return -ENOMEM; tms_local->pdev = pdev; tms_local->dmabuffer = dma_map_single(pdev, (void *)tms_local, sizeof(struct net_local), DMA_BIDIRECTIONAL); - if (tms_local->dmabuffer + sizeof(struct net_local) > dmalimit) + if (tms_local->dmabuffer + sizeof(struct net_local) > + tms_local->dmalimit) { printk(KERN_INFO "%s: Memory not accessible for DMA\n", dev->name); diff --git a/drivers/net/tokenring/tms380tr.h b/drivers/net/tokenring/tms380tr.h index 077f568d89d1..30452c67bb68 100644 --- a/drivers/net/tokenring/tms380tr.h +++ b/drivers/net/tokenring/tms380tr.h @@ -17,8 +17,7 @@ int tms380tr_open(struct net_device *dev); int tms380tr_close(struct net_device *dev); irqreturn_t tms380tr_interrupt(int irq, void *dev_id, struct pt_regs *regs); -int tmsdev_init(struct net_device *dev, unsigned long dmalimit, - struct device *pdev); +int tmsdev_init(struct net_device *dev, struct device *pdev); void tmsdev_term(struct net_device *dev); void tms380tr_wait(unsigned long time); diff --git a/drivers/net/tokenring/tmspci.c b/drivers/net/tokenring/tmspci.c index 0014aef5c744..ab47c0547a3b 100644 --- a/drivers/net/tokenring/tmspci.c +++ b/drivers/net/tokenring/tmspci.c @@ -143,7 +143,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic printk(":%2.2x", dev->dev_addr[i]); printk("\n"); - ret = tmsdev_init(dev, PCI_MAX_ADDRESS, &pdev->dev); + ret = tmsdev_init(dev, &pdev->dev); if (ret) { printk("%s: unable to get memory for dev->priv.\n", dev->name); goto err_out_irq; From 3d52365c4f62edb9ff9710e1c57952e957b2879f Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Fri, 19 Aug 2005 15:51:46 +0200 Subject: [PATCH 116/584] [PATCH] tms380tr: remove prototypes in Space.c Cleanup: remove two prototypes. Signed-off-by: Jochen Friedrich Signed-off-by: Jeff Garzik --- drivers/net/Space.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 11c44becc08f..b28e5fde0b9e 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -318,8 +318,6 @@ static void __init ethif_probe2(int unit) #ifdef CONFIG_TR /* Token-ring device probe */ extern int ibmtr_probe_card(struct net_device *); -extern struct net_device *sk_isa_probe(int unit); -extern struct net_device *proteon_probe(int unit); extern struct net_device *smctr_probe(int unit); static struct devprobe2 tr_probes2[] __initdata = { From e960fc5c7d9144b1ce80dda9891ca7dfc656c078 Mon Sep 17 00:00:00 2001 From: "ravinandan.arakali@neterion.com" Date: Fri, 12 Aug 2005 10:15:59 -0700 Subject: [PATCH 117/584] [PATCH] S2io: Hardware fixes for Xframe II adapter Hi, Patch Description: This patch incorporates the following hardware fixes required for Xframe II adapter. 1. New values to program the dtx_control register. 2. Disable memory controller interrupts(MC_INTR) since these are now monitored thru' a poll routine. 3. Don't reset an XframeII card on an ECC double-bit error(It can recover). 4. Save/restore PCI config space before/after a reset irrespective of Xframe I or II card. 5. Bumped up the driver version no. to 2.0.3.1 Please review the patch and apply the same if it looks ok. Signed-off-by: Ravinandan Arakali Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e083351e3f42..7ca78228b104 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -67,7 +67,7 @@ /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; -static char s2io_driver_version[] = "Version 2.0.2.1"; +static char s2io_driver_version[] = "Version 2.0.3.1"; static inline int RXD_IS_UP2DT(RxD_t *rxdp) { @@ -210,14 +210,18 @@ static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid) static u64 herc_act_dtx_cfg[] = { /* Set address */ - 0x80000515BA750000ULL, 0x80000515BA7500E0ULL, + 0x8000051536750000ULL, 0x80000515367500E0ULL, /* Write data */ - 0x80000515BA750004ULL, 0x80000515BA7500E4ULL, + 0x8000051536750004ULL, 0x80000515367500E4ULL, /* Set address */ 0x80010515003F0000ULL, 0x80010515003F00E0ULL, /* Write data */ 0x80010515003F0004ULL, 0x80010515003F00E4ULL, /* Set address */ + 0x801205150D440000ULL, 0x801205150D4400E0ULL, + /* Write data */ + 0x801205150D440004ULL, 0x801205150D4400E4ULL, + /* Set address */ 0x80020515F2100000ULL, 0x80020515F21000E0ULL, /* Write data */ 0x80020515F2100004ULL, 0x80020515F21000E4ULL, @@ -1903,7 +1907,7 @@ static int start_nic(struct s2io_nic *nic) } /* Enable select interrupts */ - interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | MC_INTR; + interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR; interruptible |= TX_PIC_INTR | RX_PIC_INTR; interruptible |= TX_MAC_INTR | RX_MAC_INTR; @@ -2030,7 +2034,7 @@ static void stop_nic(struct s2io_nic *nic) config = &nic->config; /* Disable all interrupts */ - interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | MC_INTR; + interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR; interruptible |= TX_PIC_INTR | RX_PIC_INTR; interruptible |= TX_MAC_INTR | RX_MAC_INTR; en_dis_able_nic_intrs(nic, interruptible, DISABLE_INTRS); @@ -2688,8 +2692,10 @@ static void alarm_intr_handler(struct s2io_nic *nic) DBG_PRINT(ERR_DBG, "%s: Device indicates ", dev->name); DBG_PRINT(ERR_DBG, "double ECC error!!\n"); - netif_stop_queue(dev); - schedule_work(&nic->rst_timer_task); + if (nic->device_type != XFRAME_II_DEVICE) { + netif_stop_queue(dev); + schedule_work(&nic->rst_timer_task); + } } else { nic->mac_control.stats_info->sw_stat. single_ecc_errs++; @@ -2772,8 +2778,7 @@ void s2io_reset(nic_t * sp) u16 subid, pci_cmd; /* Back up the PCI-X CMD reg, dont want to lose MMRBC, OST settings */ - if (sp->device_type == XFRAME_I_DEVICE) - pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, &(pci_cmd)); + pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, &(pci_cmd)); val64 = SW_RESET_ALL; writeq(val64, &bar0->sw_reset); @@ -2792,14 +2797,10 @@ void s2io_reset(nic_t * sp) */ msleep(250); - if (!(sp->device_type & XFRAME_II_DEVICE)) { - /* Restore the PCI state saved during initializarion. */ - pci_restore_state(sp->pdev); - pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, + /* Restore the PCI state saved during initialization. */ + pci_restore_state(sp->pdev); + pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, pci_cmd); - } else { - pci_set_master(sp->pdev); - } s2io_init_pci(sp); msleep(250); @@ -5426,9 +5427,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) INIT_WORK(&sp->set_link_task, (void (*)(void *)) s2io_set_link, sp); - if (!(sp->device_type & XFRAME_II_DEVICE)) { - pci_save_state(sp->pdev); - } + pci_save_state(sp->pdev); /* Setting swapper control on the NIC, for proper reset operation */ if (s2io_set_swapper(sp)) { From 945a787675cc5ba362f5d4ce135d2a2c20be5985 Mon Sep 17 00:00:00 2001 From: Peer Chen Date: Sat, 20 Aug 2005 01:10:06 -0400 Subject: [PATCH 118/584] [netdrvr uli526x] fix problems found in review - s/DEVICE/net_device/ - improve formatting - remove dead code - check return value, in several areas --- drivers/net/tulip/uli526x.c | 207 ++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 114 deletions(-) diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c index 27f99e087f79..5ae22b7bc5ca 100644 --- a/drivers/net/tulip/uli526x.c +++ b/drivers/net/tulip/uli526x.c @@ -56,8 +56,7 @@ #define RX_ALLOC_SIZE 0x620 #define ULI526X_RESET 1 #define CR0_DEFAULT 0 -#define CR6_DEFAULT 0x00080000 /* HD */ -#define CR6_DEFAULT_A 0x22240000 +#define CR6_DEFAULT 0x22200000 #define CR7_DEFAULT 0x180c1 #define CR15_DEFAULT 0x06 /* TxJabber RxWatchdog */ #define TDES0_ERR_MASK 0x4302 /* TXJT, LC, EC, FUE */ @@ -103,10 +102,13 @@ #define SROM_V41_CODE 0x14 -#define SROM_CLK_WRITE(data, ioaddr) outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr);udelay(5);outl(data|CR9_SROM_READ|CR9_SRCS|CR9_SRCLK,ioaddr);udelay(5);outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr);udelay(5); - -/* Sten Check */ -#define DEVICE net_device +#define SROM_CLK_WRITE(data, ioaddr) \ + outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr); \ + udelay(5); \ + outl(data|CR9_SROM_READ|CR9_SRCS|CR9_SRCLK,ioaddr); \ + udelay(5); \ + outl(data|CR9_SROM_READ|CR9_SRCS,ioaddr); \ + udelay(5); /* Structure/enum declaration ------------------------------- */ struct tx_desc { @@ -123,7 +125,7 @@ struct rx_desc { struct uli526x_board_info { u32 chip_id; /* Chip vendor/Device ID */ - struct DEVICE *next_dev; /* next device */ + struct net_device *next_dev; /* next device */ struct pci_dev *pdev; /* PCI device */ spinlock_t lock; @@ -212,22 +214,21 @@ static u32 uli526x_cr6_user_set; /* For module input parameter */ static int debug; static u32 cr6set; -static u32 m526x_id; static unsigned char mode = 8; /* function declaration ------------------------------------- */ -static int uli526x_open(struct DEVICE *); -static int uli526x_start_xmit(struct sk_buff *, struct DEVICE *); -static int uli526x_stop(struct DEVICE *); -static struct net_device_stats * uli526x_get_stats(struct DEVICE *); -static void uli526x_set_filter_mode(struct DEVICE *); +static int uli526x_open(struct net_device *); +static int uli526x_start_xmit(struct sk_buff *, struct net_device *); +static int uli526x_stop(struct net_device *); +static struct net_device_stats * uli526x_get_stats(struct net_device *); +static void uli526x_set_filter_mode(struct net_device *); static struct ethtool_ops netdev_ethtool_ops; -static u16 read_srom_word(long ,int); -static irqreturn_t uli526x_interrupt(int , void *, struct pt_regs *); +static u16 read_srom_word(long, int); +static irqreturn_t uli526x_interrupt(int, void *, struct pt_regs *); static void uli526x_descriptor_init(struct uli526x_board_info *, unsigned long); static void allocate_rx_buffer(struct uli526x_board_info *); static void update_cr6(u32, unsigned long); -static void send_filter_frame(struct DEVICE * ,int); +static void send_filter_frame(struct net_device *, int); static u16 phy_read(unsigned long, u8, u8, u32); static u16 phy_readby_cr10(unsigned long, u8, u8); static void phy_write(unsigned long, u8, u8, u16, u32); @@ -237,18 +238,18 @@ static u16 phy_read_1bit(unsigned long, u32); static u8 uli526x_sense_speed(struct uli526x_board_info *); static void uli526x_process_mode(struct uli526x_board_info *); static void uli526x_timer(unsigned long); -static void uli526x_rx_packet(struct DEVICE *, struct uli526x_board_info *); -static void uli526x_free_tx_pkt(struct DEVICE *, struct uli526x_board_info *); +static void uli526x_rx_packet(struct net_device *, struct uli526x_board_info *); +static void uli526x_free_tx_pkt(struct net_device *, struct uli526x_board_info *); static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *); -static void uli526x_dynamic_reset(struct DEVICE *); +static void uli526x_dynamic_reset(struct net_device *); static void uli526x_free_rxbuffer(struct uli526x_board_info *); -static void uli526x_init(struct DEVICE *); +static void uli526x_init(struct net_device *); static void uli526x_set_phyxcer(struct uli526x_board_info *); -/* ULI526X network baord routine ---------------------------- */ +/* ULI526X network board routine ---------------------------- */ /* - * Search ULI526X board ,allocate space and register it + * Search ULI526X board, allocate space and register it */ static int __devinit uli526x_init_one (struct pci_dev *pdev, @@ -257,8 +258,7 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, struct uli526x_board_info *db; /* board information structure */ struct net_device *dev; int i, err; - u32 configval; - + ULI526X_DBUG(0, "uli526x_init_one()", 0); if (!printed_version++) @@ -271,7 +271,7 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, SET_MODULE_OWNER(dev); SET_NETDEV_DEV(dev, &pdev->dev); - if (pci_set_dma_mask(pdev, 0xffffffff)) { + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { printk(KERN_WARNING DRV_NAME ": 32-bit PCI DMA not available.\n"); err = -ENODEV; goto err_out_free; @@ -300,23 +300,23 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, goto err_out_disable; } - //add by clearzhang 2004/7/8 - pci_read_config_dword(pdev,0x0,&configval); - m526x_id = configval; - if(configval == 0x526310b9) - { - //printk("is m5263\n"); - pci_read_config_dword(pdev,0x0c,&configval); - configval = ((configval & 0xffff00ff) | 0x8000); - pci_write_config_dword(pdev,0x0c,configval); - } /* Init system & device */ db = netdev_priv(dev); /* Allocate Tx/Rx descriptor memory */ db->desc_pool_ptr = pci_alloc_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20, &db->desc_pool_dma_ptr); + if(db->desc_pool_ptr == NULL) + { + err = -ENOMEM; + goto err_out_nomem; + } db->buf_pool_ptr = pci_alloc_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, &db->buf_pool_dma_ptr); - + if(db->buf_pool_ptr == NULL) + { + err = -ENOMEM; + goto err_out_nomem; + } + db->first_tx_desc = (struct tx_desc *) db->desc_pool_ptr; db->first_tx_desc_dma = db->desc_pool_dma_ptr; db->buf_pool_start = db->buf_pool_ptr; @@ -347,7 +347,7 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, ((u16 *) db->srom)[i] = cpu_to_le16(read_srom_word(db->ioaddr, i)); /* Set Node address */ - if(((u16 *) db->srom)[0] == 0xffff) /* SROM absent, so read MAC address from ID Table */ + if(((u16 *) db->srom)[0] == 0xffff || ((u16 *) db->srom)[0] == 0) /* SROM absent, so read MAC address from ID Table */ { outl(0x10000, db->ioaddr + DCR0); //Diagnosis mode outl(0x1c0, db->ioaddr + DCR13); //Reset dianostic pointer port @@ -385,6 +385,14 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, err_out_res: pci_release_regions(pdev); +err_out_nomem: + if(db->desc_pool_ptr) + pci_free_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20, + db->desc_pool_ptr, db->desc_pool_dma_ptr); + + if(db->buf_pool_ptr != NULL) + pci_free_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, + db->buf_pool_ptr, db->buf_pool_dma_ptr); err_out_disable: pci_disable_device(pdev); err_out_free: @@ -402,32 +410,30 @@ static void __devexit uli526x_remove_one (struct pci_dev *pdev) ULI526X_DBUG(0, "uli526x_remove_one()", 0); - if (dev) { - pci_free_consistent(db->pdev, sizeof(struct tx_desc) * - DESC_ALL_CNT + 0x20, db->desc_pool_ptr, - db->desc_pool_dma_ptr); - pci_free_consistent(db->pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, - db->buf_pool_ptr, db->buf_pool_dma_ptr); - unregister_netdev(dev); - pci_release_regions(pdev); - free_netdev(dev); /* free board information */ - pci_set_drvdata(pdev, NULL); - } - + pci_free_consistent(db->pdev, sizeof(struct tx_desc) * + DESC_ALL_CNT + 0x20, db->desc_pool_ptr, + db->desc_pool_dma_ptr); + pci_free_consistent(db->pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, + db->buf_pool_ptr, db->buf_pool_dma_ptr); + unregister_netdev(dev); + pci_release_regions(pdev); + free_netdev(dev); /* free board information */ + pci_set_drvdata(pdev, NULL); + pci_disable_device(pdev); ULI526X_DBUG(0, "uli526x_remove_one() exit", 0); } /* * Open the interface. - * The interface is opened whenever "ifconfig" actives it. + * The interface is opened whenever "ifconfig" activates it. */ -static int uli526x_open(struct DEVICE *dev) +static int uli526x_open(struct net_device *dev) { int ret; struct uli526x_board_info *db = netdev_priv(dev); - + ULI526X_DBUG(0, "uli526x_open", 0); ret = request_irq(dev->irq, &uli526x_interrupt, SA_SHIRQ, dev->name, dev); @@ -436,11 +442,6 @@ static int uli526x_open(struct DEVICE *dev) /* system variable init */ db->cr6_data = CR6_DEFAULT | uli526x_cr6_user_set; - if(m526x_id == 0x526310b9) - { - //printk("is 5263\n"); - db->cr6_data = CR6_DEFAULT_A | uli526x_cr6_user_set; - } db->tx_packet_cnt = 0; db->rx_avail_cnt = 0; db->link_failed = 1; @@ -454,7 +455,7 @@ static int uli526x_open(struct DEVICE *dev) db->cr6_data |= ULI526X_TXTH_256; db->cr0_data = CR0_DEFAULT; - /* Initilize ULI526X board */ + /* Initialize ULI526X board */ uli526x_init(dev); /* Active System Interface */ @@ -471,14 +472,14 @@ static int uli526x_open(struct DEVICE *dev) } -/* Initilize ULI526X board +/* Initialize ULI526X board * Reset ULI526X board - * Initilize TX/Rx descriptor chain structure + * Initialize TX/Rx descriptor chain structure * Send the set-up frame * Enable Tx/Rx machine */ -static void uli526x_init(struct DEVICE *dev) +static void uli526x_init(struct net_device *dev) { struct uli526x_board_info *db = netdev_priv(dev); unsigned long ioaddr = db->ioaddr; @@ -510,11 +511,6 @@ static void uli526x_init(struct DEVICE *dev) /* Parser SROM and media mode */ db->media_mode = uli526x_media_mode; - //add by clearzhang 2004/7/8 - /* RESET Phyxcer Chip by GPR port bit 7 */ - //outl(0x180, ioaddr + DCR12); /* Let bit 7 output port */ - //outl(0x0, ioaddr + DCR12); /* Clear RESET signal */ - /* Phyxcer capability setting */ phy_reg_reset = phy_read(db->ioaddr, db->phy_addr, 0, db->chip_id); phy_reg_reset = (phy_reg_reset | 0x8000); @@ -528,7 +524,7 @@ static void uli526x_init(struct DEVICE *dev) if ( !(db->media_mode & ULI526X_AUTO) ) db->op_mode = db->media_mode; /* Force Mode */ - /* Initiliaze Transmit/Receive decriptor and CR3/4 */ + /* Initialize Transmit/Receive decriptor and CR3/4 */ uli526x_descriptor_init(db, ioaddr); /* Init CR6 to program M526X operation */ @@ -555,7 +551,7 @@ static void uli526x_init(struct DEVICE *dev) * Send a packet to media from the upper layer. */ -static int uli526x_start_xmit(struct sk_buff *skb, struct DEVICE *dev) +static int uli526x_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct uli526x_board_info *db = netdev_priv(dev); struct tx_desc *txptr; @@ -621,7 +617,7 @@ static int uli526x_start_xmit(struct sk_buff *skb, struct DEVICE *dev) * The interface is stopped when it is brought. */ -static int uli526x_stop(struct DEVICE *dev) +static int uli526x_stop(struct net_device *dev) { struct uli526x_board_info *db = netdev_priv(dev); unsigned long ioaddr = dev->base_addr; @@ -665,19 +661,16 @@ static int uli526x_stop(struct DEVICE *dev) static irqreturn_t uli526x_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - struct DEVICE *dev = dev_id; + struct net_device *dev = dev_id; struct uli526x_board_info *db = netdev_priv(dev); unsigned long ioaddr = dev->base_addr; unsigned long flags; - //ULI526X_DBUG(0, "uli526x_interrupt()", 0); - if (!dev) { ULI526X_DBUG(1, "uli526x_interrupt() without DEVICE arg", 0); return IRQ_NONE; } - //outl(0, ioaddr + DCR7); spin_lock_irqsave(&db->lock, flags); outl(0, ioaddr + DCR7); @@ -690,9 +683,6 @@ static irqreturn_t uli526x_interrupt(int irq, void *dev_id, struct pt_regs *regs return IRQ_HANDLED; } - /* Disable all interrupt in CR7 to solve the interrupt edge problem */ - //outl(0, ioaddr + DCR7); - /* Check system status */ if (db->cr5_data & 0x2000) { /* system bus error happen */ @@ -727,10 +717,9 @@ static irqreturn_t uli526x_interrupt(int irq, void *dev_id, struct pt_regs *regs * Free TX resource after TX complete */ -static void uli526x_free_tx_pkt(struct DEVICE *dev, struct uli526x_board_info * db) +static void uli526x_free_tx_pkt(struct net_device *dev, struct uli526x_board_info * db) { struct tx_desc *txptr; -// unsigned long ioaddr = dev->base_addr; u32 tdes0; txptr = db->tx_remove_ptr; @@ -787,7 +776,7 @@ static void uli526x_free_tx_pkt(struct DEVICE *dev, struct uli526x_board_info * * Receive the come packet and pass to upper layer */ -static void uli526x_rx_packet(struct DEVICE *dev, struct uli526x_board_info * db) +static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info * db) { struct rx_desc *rxptr; struct sk_buff *skb; @@ -871,7 +860,7 @@ static void uli526x_rx_packet(struct DEVICE *dev, struct uli526x_board_info * db * Get statistics from driver. */ -static struct net_device_stats * uli526x_get_stats(struct DEVICE *dev) +static struct net_device_stats * uli526x_get_stats(struct net_device *dev) { struct uli526x_board_info *db = netdev_priv(dev); @@ -884,7 +873,7 @@ static struct net_device_stats * uli526x_get_stats(struct DEVICE *dev) * Set ULI526X multicast address */ -static void uli526x_set_filter_mode(struct DEVICE * dev) +static void uli526x_set_filter_mode(struct net_device * dev) { struct uli526x_board_info *db = dev->priv; unsigned long flags; @@ -916,34 +905,26 @@ static void uli526x_set_filter_mode(struct DEVICE * dev) static void ULi_ethtool_gset(struct uli526x_board_info *db, struct ethtool_cmd *ecmd) { - //struct e1000_hw *hw = &adapter->hw; - - { - - ecmd->supported = (SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_Autoneg | - SUPPORTED_MII); + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | + SUPPORTED_MII); - ecmd->advertising = (ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full | - ADVERTISED_Autoneg | - ADVERTISED_MII); + ecmd->advertising = (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_Autoneg | + ADVERTISED_MII); - ecmd->port = PORT_MII; - ecmd->phy_address = db->phy_addr; + ecmd->port = PORT_MII; + ecmd->phy_address = db->phy_addr; - ecmd->transceiver = XCVR_EXTERNAL; + ecmd->transceiver = XCVR_EXTERNAL; - - } - - ecmd->speed = 10; ecmd->duplex = DUPLEX_HALF; @@ -965,8 +946,6 @@ ULi_ethtool_gset(struct uli526x_board_info *db, struct ethtool_cmd *ecmd) { ecmd->autoneg = AUTONEG_ENABLE; } - - } static void netdev_get_drvinfo(struct net_device *dev, @@ -1022,7 +1001,7 @@ static void uli526x_timer(unsigned long data) { u32 tmp_cr8; unsigned char tmp_cr12=0; - struct DEVICE *dev = (struct DEVICE *) data; + struct net_device *dev = (struct net_device *) data; struct uli526x_board_info *db = netdev_priv(dev); unsigned long flags; u8 TmpSpeed=10; @@ -1135,10 +1114,10 @@ static void uli526x_timer(unsigned long data) * Stop ULI526X board * Free Tx/Rx allocated memory * Reset ULI526X board - * Re-initilize ULI526X board + * Re-initialize ULI526X board */ -static void uli526x_dynamic_reset(struct DEVICE *dev) +static void uli526x_dynamic_reset(struct net_device *dev) { struct uli526x_board_info *db = netdev_priv(dev); @@ -1163,7 +1142,7 @@ static void uli526x_dynamic_reset(struct DEVICE *dev) db->init=1; db->wait_reset = 0; - /* Re-initilize ULI526X board */ + /* Re-initialize ULI526X board */ uli526x_init(dev); /* Restart upper layer interface */ @@ -1273,7 +1252,7 @@ static void uli526x_descriptor_init(struct uli526x_board_info *db, unsigned long /* * Update CR6 value - * Firstly stop ULI526X , then written value and start + * Firstly stop ULI526X, then written value and start */ static void update_cr6(u32 cr6_data, unsigned long ioaddr) @@ -1286,10 +1265,10 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr) /* * Send a setup frame for M5261/M5263 - * This setup frame initilize ULI526X address filter mode + * This setup frame initialize ULI526X address filter mode */ -static void send_filter_frame(struct DEVICE *dev, int mc_cnt) +static void send_filter_frame(struct net_device *dev, int mc_cnt) { struct uli526x_board_info *db = netdev_priv(dev); struct dev_mc_list *mcptr; @@ -1718,7 +1697,7 @@ MODULE_PARM_DESC(mode, "ULi M5261/M5263: Bit 0: 10/100Mbps, bit 2: duplex, bit 8 /* Description: * when user used insmod to add module, system invoked init_module() - * to initilize and register. + * to register the services. */ static int __init uli526x_init_module(void) From ffbbf7a3ccdcac7526296a55968e5dac0626fd9e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 20 Aug 2005 17:40:04 +1000 Subject: [PATCH 119/584] drm: add new texture upload code from r300 project Paul Mackerras did some new upload code for r300, I forgot to add it to the kernel with r300 merge. Signed-off-by: Dave Airlie --- drivers/char/drm/radeon_state.c | 64 +++++++++++++++------------------ 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c index d57accdd8df5..64a3e3a406ef 100644 --- a/drivers/char/drm/radeon_state.c +++ b/drivers/char/drm/radeon_state.c @@ -1493,7 +1493,7 @@ static void radeon_cp_dispatch_indices( drm_device_t *dev, } -#define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32)) +#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE static int radeon_cp_dispatch_texture( DRMFILE filp, drm_device_t *dev, @@ -1506,10 +1506,11 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, u32 format; u32 *buffer; const u8 __user *data; - int size, dwords, tex_width, blit_width; + int size, dwords, tex_width, blit_width, spitch; u32 height; int i; u32 texpitch, microtile; + u32 offset; RING_LOCALS; DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); @@ -1530,17 +1531,6 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, RADEON_WAIT_UNTIL_IDLE(); ADVANCE_RING(); -#ifdef __BIG_ENDIAN - /* The Mesa texture functions provide the data in little endian as the - * chip wants it, but we need to compensate for the fact that the CP - * ring gets byte-swapped - */ - BEGIN_RING( 2 ); - OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT ); - ADVANCE_RING(); -#endif - - /* The compiler won't optimize away a division by a variable, * even if the only legal values are powers of two. Thus, we'll * use a shift instead. @@ -1572,6 +1562,10 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, DRM_ERROR( "invalid texture format %d\n", tex->format ); return DRM_ERR(EINVAL); } + spitch = blit_width >> 6; + if (spitch == 0 && image->height > 1) + return DRM_ERR(EINVAL); + texpitch = tex->pitch; if ((texpitch << 22) & RADEON_DST_TILE_MICRO) { microtile = 1; @@ -1624,25 +1618,6 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, */ buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset); dwords = size / 4; - buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 ); - buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (format << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_HOST_DATA | - RADEON_GMC_CLR_CMP_CNTL_DIS | - RADEON_GMC_WR_MSK_DIS); - - buffer[2] = (texpitch << 22) | (tex->offset >> 10); - buffer[3] = 0xffffffff; - buffer[4] = 0xffffffff; - buffer[5] = (image->y << 16) | image->x; - buffer[6] = (height << 16) | image->width; - buffer[7] = dwords; - buffer += 8; - - if (microtile) { /* texture micro tiling in use, minimum texture width is thus 16 bytes. @@ -1750,9 +1725,28 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, } buf->filp = filp; - buf->used = (dwords + 8) * sizeof(u32); - radeon_cp_dispatch_indirect( dev, buf, 0, buf->used ); - radeon_cp_discard_buffer( dev, buf ); + buf->used = size; + offset = dev_priv->gart_buffers_offset + buf->offset; + BEGIN_RING(9); + OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5)); + OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (format << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | + RADEON_GMC_WR_MSK_DIS ); + OUT_RING((spitch << 22) | (offset >> 10)); + OUT_RING((texpitch << 22) | (tex->offset >> 10)); + OUT_RING(0); + OUT_RING((image->x << 16) | image->y); + OUT_RING((image->width << 16) | height); + RADEON_WAIT_UNTIL_2D_IDLE(); + ADVANCE_RING(); + + radeon_cp_discard_buffer(dev, buf); /* Update the input parameters for next time */ image->y += height; From 88f399cd0a5a540db2815eee3002f8f00ef6461e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 20 Aug 2005 17:43:33 +1000 Subject: [PATCH 120/584] drm: fixes for powerpc Remove a bogus check on whether an area is memory (we need a better interface) also change pgprot flags for powerpc don't check on x86-64 either From: Paul Mackerras Signed-off-by: Dave Airlie --- drivers/char/drm/drm_bufs.c | 4 ++-- drivers/char/drm/drm_vm.c | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index b70802035d9e..e0743ebbe4bd 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -152,7 +152,7 @@ int drm_addmap(drm_device_t * dev, unsigned int offset, switch ( map->type ) { case _DRM_REGISTERS: case _DRM_FRAME_BUFFER: -#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) +#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) && !defined(__powerpc64__) && !defined(__x86_64__) if ( map->offset + map->size < map->offset || map->offset < virt_to_phys(high_memory) ) { drm_free( map, sizeof(*map), DRM_MEM_MAPS ); @@ -309,7 +309,7 @@ int drm_addmap_ioctl(struct inode *inode, struct file *filp, if (put_user(handle, &argp->handle)) return -EFAULT; return 0; - } +} /** diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index 292fa7167b26..ced4215e2275 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -578,17 +578,17 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma) /* fall through to _DRM_FRAME_BUFFER... */ case _DRM_FRAME_BUFFER: case _DRM_REGISTERS: - if (map->offset >= __pa(high_memory)) { #if defined(__i386__) || defined(__x86_64__) - if (boot_cpu_data.x86 > 3 && map->type != _DRM_AGP) { - pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; - pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT; - } -#elif defined(__powerpc__) - pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE | _PAGE_GUARDED; -#endif - vma->vm_flags |= VM_IO; /* not in core dump */ + if (boot_cpu_data.x86 > 3 && map->type != _DRM_AGP) { + pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT; } +#elif defined(__powerpc__) + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + if (map->type == _DRM_REGISTERS) + pgprot_val(vma->vm_page_prot) |= _PAGE_GUARDED; +#endif + vma->vm_flags |= VM_IO; /* not in core dump */ #if defined(__ia64__) if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) From e0c93142ceed0a2ea4f5b8e9d0f62c884241e9c0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 20 Aug 2005 17:20:28 +0100 Subject: [PATCH 121/584] [ARM] fs/adfs/adfs.h: "extern inline" doesn't make sense "extern inline" doesn't make sense. Signed-off-by: Adrian Bunk Signed-off-by: Russell King --- fs/adfs/adfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 63f5df9afb71..fd528433de43 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -97,7 +97,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj); extern struct inode_operations adfs_file_inode_operations; extern struct file_operations adfs_file_operations; -extern inline __u32 signed_asl(__u32 val, signed int shift) +static inline __u32 signed_asl(__u32 val, signed int shift) { if (shift >= 0) val <<= shift; @@ -112,7 +112,7 @@ extern inline __u32 signed_asl(__u32 val, signed int shift) * * The root directory ID should always be looked up in the map [3.4] */ -extern inline int +static inline int __adfs_block_map(struct super_block *sb, unsigned int object_id, unsigned int block) { From 7a9aff3cff807261e476a1719273a4ac5d254ecb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 23 Aug 2005 12:15:43 +1000 Subject: [PATCH 122/584] drm: fix a bad VERSION check. I found why my G5 was crashing when using the linux-2.6 version of the DRM + git-drm.patch from 2.6.13-rc6-mm1, but not with the CVS DRM. The reason was that dev->agp->cant_use_aperture wasn't getting set, and the reason for that was that no longer gets included and the #if LINUX_VERSION_CODE < 0x020408 in drm_agpsupport.c was going the wrong way. With this patch (and a few others) a 32-bit server works correctly, as does DRI. From: Paul Mackerras Signed-off-by: Dave Airlie --- drivers/char/drm/drm_agpsupport.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/char/drm/drm_agpsupport.c b/drivers/char/drm/drm_agpsupport.c index ffb4acaefe84..8c215adcb4b2 100644 --- a/drivers/char/drm/drm_agpsupport.c +++ b/drivers/char/drm/drm_agpsupport.c @@ -426,13 +426,8 @@ drm_agp_head_t *drm_agp_init(drm_device_t *dev) return NULL; } head->memory = NULL; -#if LINUX_VERSION_CODE <= 0x020408 - head->cant_use_aperture = 0; - head->page_mask = ~(0xfff); -#else head->cant_use_aperture = head->agp_info.cant_use_aperture; head->page_mask = head->agp_info.page_mask; -#endif return head; } From 2600636065406dc14948ac2d2913c66c51be80d5 Mon Sep 17 00:00:00 2001 From: Dale Farnsworth Date: Mon, 22 Aug 2005 15:53:29 -0700 Subject: [PATCH 123/584] [PATCH] mv643xx: add workaround for HW checksum generation bug [PATCH] [NET] mv643xx: add workaround for HW checksum generation bug The hardware checksum generator on the mv64xxx occasionally generates an incorrect checksum. This patch works around the issue and enables hardware checksum generation. Signed-off-by: Dale Farnsworth Signed-off-by: Jeff Garzik --- drivers/net/mv643xx_eth.c | 29 ++++++++++++++++++----------- drivers/net/mv643xx_eth.h | 4 +++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 0405e1f0d3df..fb6b232069d6 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1157,16 +1157,20 @@ static int mv643xx_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!skb_shinfo(skb)->nr_frags) { linear: if (skb->ip_summed != CHECKSUM_HW) { + /* Errata BTS #50, IHL must be 5 if no HW checksum */ pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT | - ETH_TX_FIRST_DESC | ETH_TX_LAST_DESC; + ETH_TX_FIRST_DESC | + ETH_TX_LAST_DESC | + 5 << ETH_TX_IHL_SHIFT; pkt_info.l4i_chk = 0; } else { - u32 ipheader = skb->nh.iph->ihl << 11; pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT | - ETH_TX_FIRST_DESC | ETH_TX_LAST_DESC | - ETH_GEN_TCP_UDP_CHECKSUM | - ETH_GEN_IP_V_4_CHECKSUM | ipheader; + ETH_TX_FIRST_DESC | + ETH_TX_LAST_DESC | + ETH_GEN_TCP_UDP_CHECKSUM | + ETH_GEN_IP_V_4_CHECKSUM | + skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; /* CPU already calculated pseudo header checksum. */ if (skb->nh.iph->protocol == IPPROTO_UDP) { pkt_info.cmd_sts |= ETH_UDP_FRAME; @@ -1193,7 +1197,6 @@ linear: stats->tx_bytes += pkt_info.byte_cnt; } else { unsigned int frag; - u32 ipheader; /* Since hardware can't handle unaligned fragments smaller * than 9 bytes, if we find any, we linearize the skb @@ -1222,12 +1225,16 @@ linear: DMA_TO_DEVICE); pkt_info.l4i_chk = 0; pkt_info.return_info = 0; - pkt_info.cmd_sts = ETH_TX_FIRST_DESC; - if (skb->ip_summed == CHECKSUM_HW) { - ipheader = skb->nh.iph->ihl << 11; - pkt_info.cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | - ETH_GEN_IP_V_4_CHECKSUM | ipheader; + if (skb->ip_summed != CHECKSUM_HW) + /* Errata BTS #50, IHL must be 5 if no HW checksum */ + pkt_info.cmd_sts = ETH_TX_FIRST_DESC | + 5 << ETH_TX_IHL_SHIFT; + else { + pkt_info.cmd_sts = ETH_TX_FIRST_DESC | + ETH_GEN_TCP_UDP_CHECKSUM | + ETH_GEN_IP_V_4_CHECKSUM | + skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; /* CPU already calculated pseudo header checksum. */ if (skb->nh.iph->protocol == IPPROTO_UDP) { pkt_info.cmd_sts |= ETH_UDP_FRAME; diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h index 57c4f8fbfdb6..7678b59c2952 100644 --- a/drivers/net/mv643xx_eth.h +++ b/drivers/net/mv643xx_eth.h @@ -49,7 +49,7 @@ /* Checksum offload for Tx works for most packets, but * fails if previous packet sent did not use hw csum */ -#undef MV643XX_CHECKSUM_OFFLOAD_TX +#define MV643XX_CHECKSUM_OFFLOAD_TX #define MV643XX_NAPI #define MV643XX_TX_FAST_REFILL #undef MV643XX_RX_QUEUE_FILL_ON_TASK /* Does not work, yet */ @@ -217,6 +217,8 @@ #define ETH_TX_ENABLE_INTERRUPT (BIT23) #define ETH_AUTO_MODE (BIT30) +#define ETH_TX_IHL_SHIFT 11 + /* typedefs */ typedef enum _eth_func_ret_status { From 3f3791d30036a367f05b9d0be28ce7ff2e017a5a Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Tue, 16 Aug 2005 14:25:38 +0800 Subject: [PATCH 124/584] [PATCH] libata: Clear ATA_QCFLAG_ACTIVE flag before calling the completion callback Description: After calling the completion callback, the libata error handler might be running and getting atapi sense data. Clearing the ATA_QCFLAG_ACTIVE flag at this point might interfere with the libata error handler. Changes: - Clear the ATA_QCFLAG_ACTIVE flag before calling the completion callback (and also before the error handler) - Add some comment Signed-off-by: Albert Lee Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 3544f5d020c6..9a6aacf467b8 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3213,9 +3213,14 @@ void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat) if (likely(qc->flags & ATA_QCFLAG_DMAMAP)) ata_sg_clean(qc); + /* atapi: mark qc as inactive to prevent the interrupt handler + * from completing the command twice later, before the error handler + * is called. (when rc != 0 and atapi request sense is needed) + */ + qc->flags &= ~ATA_QCFLAG_ACTIVE; + /* call completion callback */ rc = qc->complete_fn(qc, drv_stat); - qc->flags &= ~ATA_QCFLAG_ACTIVE; /* if callback indicates not to complete command (non-zero), * return immediately From 08b791c02b86e25f456cba64f5f1a1f90326db1d Mon Sep 17 00:00:00 2001 From: Otto Meier Date: Mon, 22 Aug 2005 14:58:57 +0100 Subject: [PATCH 125/584] [PATCH] sata_promise: Add PDC40718 id Otto Meier recently submitted a patch to support the PDC40718 chip (marketed as SATA300 TX4, a 4-port SATA controller). Signed-off-by: Otto Meier Signed-off-by: Daniel Drake Signed-off-by: Jeff Garzik --- drivers/scsi/sata_promise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 5c1d4411457a..0392a7530318 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -181,6 +181,8 @@ static struct pci_device_id pdc_ata_pci_tbl[] = { board_20319 }, { PCI_VENDOR_ID_PROMISE, 0x3319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, board_20319 }, + { PCI_VENDOR_ID_PROMISE, 0x3d17, PCI_ANY_ID, PCI_ANY_ID, 0, 0, + board_20319 }, { PCI_VENDOR_ID_PROMISE, 0x3d18, PCI_ANY_ID, PCI_ANY_ID, 0, 0, board_20319 }, From 9309049544935f804b745aa4dea043fb39b2bf2a Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 22 Aug 2005 14:59:23 +0100 Subject: [PATCH 126/584] [PATCH] sata_promise: Add PDC40519 id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Promise TX4200 is a 4-port SATA controller based on the PDC40519 chip. It meets the description of the 20319, so just a simple ID needs to be added to support this hardware. Thanks to Martin Povolný for testing. Signed-off-by: Daniel Drake Signed-off-by: Jeff Garzik --- drivers/scsi/sata_promise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 0392a7530318..cc613b3c6ce6 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -181,6 +181,8 @@ static struct pci_device_id pdc_ata_pci_tbl[] = { board_20319 }, { PCI_VENDOR_ID_PROMISE, 0x3319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, board_20319 }, + { PCI_VENDOR_ID_PROMISE, 0x3519, PCI_ANY_ID, PCI_ANY_ID, 0, 0, + board_20319 }, { PCI_VENDOR_ID_PROMISE, 0x3d17, PCI_ANY_ID, PCI_ANY_ID, 0, 0, board_20319 }, { PCI_VENDOR_ID_PROMISE, 0x3d18, PCI_ANY_ID, PCI_ANY_ID, 0, 0, From c0b34ad2956036cdba87792d6c46d8f491539df1 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 4 Aug 2005 09:04:56 +0200 Subject: [PATCH 127/584] [PATCH] Fix HD activity LED with ahci Patch: fix wrong HD activity control by ahci driver The ahci driver 1.0 sets the SActive bit on every transaction, causing the LED to light up. The SActive bit is used only for native command queuing (NCQ) which the current driver version doesn't implement. Resetting the SActive bit is the device's responsibility (by sending a "Set Device Bits FIS" to the host adapter) but this is not required in response to non-NCQ commands, and (most) devices don't. Thus the LED stays always on. This patch fixes the LED behavior. Spec references: http://www.intel.com/technology/serialata/pdf/rev1_1.pdf, sec. 3.3.13, 5.5.1 http://www.serialata.org/docs/serialata10a.pdf http://www.intel.com/design/storage/papers/25266401.pdf Signed-off-by: Martin.Wilck@fujitsu-siemens.com Signed-off-by: Jeff Garzik --- drivers/scsi/ahci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 0c79cafb1348..0b228ff67143 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -698,9 +698,6 @@ static int ahci_qc_issue(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; void *port_mmio = (void *) ap->ioaddr.cmd_addr; - writel(1, port_mmio + PORT_SCR_ACT); - readl(port_mmio + PORT_SCR_ACT); /* flush */ - writel(1, port_mmio + PORT_CMD_ISSUE); readl(port_mmio + PORT_CMD_ISSUE); /* flush */ From c1389503710ef4b4e5d21bea284afde19e9619cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Aug 2005 14:59:24 +0900 Subject: [PATCH 128/584] [PATCH] fix atapi_packet_task vs. intr race (take 2) Interrupts from devices sharing the same IRQ could cause ata_host_intr to finish commands being processed by atapi_packet_task if the commands are using ATA_PROT_ATAPI_NODATA or ATA_PROT_ATAPI_DMA protocol. This is because libata interrupt handler is unaware that interrupts are not expected during that period. This patch adds ATA_FLAG_NOINTR flag to tell the interrupt handler that we're not expecting interrupts. Note that once proper HSM is implemented for interrupt-driven PIO, this should be merged into it and this flag will be removed. ahci.c is a different kind of beast, so it's left alone. * The following drivers use ata_qc_issue_prot and ata_interrupt, so changes in libata core will do. ata_piix sata_sil sata_svw sata_via sata_sis sata_uli * The following drivers use ata_qc_issue_prot and custom intr handler. They need this change to work correctly. sata_nv sata_vsc * The following drivers use custom issue function and intr handler. Currently all custom issue functions don't support ATAPI, so this change is irrelevant, updated for consistency and to avoid later mistakes. sata_promise sata_qstor sata_sx4 Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 33 ++++++++++++++++++++++----------- drivers/scsi/sata_nv.c | 3 ++- drivers/scsi/sata_promise.c | 3 ++- drivers/scsi/sata_qstor.c | 6 ++++-- drivers/scsi/sata_sx4.c | 3 ++- drivers/scsi/sata_vsc.c | 3 ++- include/linux/libata.h | 2 ++ 7 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9a6aacf467b8..c92439fe5dae 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3350,11 +3350,13 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc) break; case ATA_PROT_ATAPI_NODATA: + ap->flags |= ATA_FLAG_NOINTR; ata_tf_to_host_nolock(ap, &qc->tf); queue_work(ata_wq, &ap->packet_task); break; case ATA_PROT_ATAPI_DMA: + ap->flags |= ATA_FLAG_NOINTR; ap->ops->tf_load(ap, &qc->tf); /* load tf registers */ ap->ops->bmdma_setup(qc); /* set up bmdma */ queue_work(ata_wq, &ap->packet_task); @@ -3708,7 +3710,8 @@ irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs) struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); @@ -3760,19 +3763,27 @@ static void atapi_packet_task(void *_data) /* send SCSI cdb */ DPRINTK("send cdb\n"); assert(ap->cdb_len >= 12); - ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); - /* if we are DMA'ing, irq handler takes over from here */ - if (qc->tf.protocol == ATA_PROT_ATAPI_DMA) - ap->ops->bmdma_start(qc); /* initiate bmdma */ + if (qc->tf.protocol == ATA_PROT_ATAPI_DMA || + qc->tf.protocol == ATA_PROT_ATAPI_NODATA) { + unsigned long flags; - /* non-data commands are also handled via irq */ - else if (qc->tf.protocol == ATA_PROT_ATAPI_NODATA) { - /* do nothing */ - } + /* Once we're done issuing command and kicking bmdma, + * irq handler takes over. To not lose irq, we need + * to clear NOINTR flag before sending cdb, but + * interrupt handler shouldn't be invoked before we're + * finished. Hence, the following locking. + */ + spin_lock_irqsave(&ap->host_set->lock, flags); + ap->flags &= ~ATA_FLAG_NOINTR; + ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); + if (qc->tf.protocol == ATA_PROT_ATAPI_DMA) + ap->ops->bmdma_start(qc); /* initiate bmdma */ + spin_unlock_irqrestore(&ap->host_set->lock, flags); + } else { + ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); - /* PIO commands are handled by polling */ - else { + /* PIO commands are handled by polling */ ap->pio_task_state = PIO_ST; queue_work(ata_wq, &ap->pio_task); } diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 9b9142790bd6..41a3421b02b4 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -291,7 +291,8 @@ static irqreturn_t nv_interrupt (int irq, void *dev_instance, struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index cc613b3c6ce6..6defd7962359 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -445,7 +445,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r VPRINTK("port %u\n", i); ap = host_set->ports[i]; tmp = mask & (1 << (i + 1)); - if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (tmp && ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index dca9ed7ac760..08a84042ce09 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -386,7 +386,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n", sff1, sff0, port_no, sHST, sDST); handled = 1; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && !(ap->flags & + (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; struct qs_port_priv *pp = ap->private_data; if (!pp || pp->state != qs_state_pkt) @@ -417,7 +418,8 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) for (port_no = 0; port_no < host_set->n_ports; ++port_no) { struct ata_port *ap; ap = host_set->ports[port_no]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; struct qs_port_priv *pp = ap->private_data; if (!pp || pp->state != qs_state_mmio) diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 76644ea62d67..e2db499f22dd 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -825,7 +825,8 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re ap = host_set->ports[port_no]; tmp = mask & (1 << i); VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp); - if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (tmp && ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index cb3a6d89cf00..6f2562171be0 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -173,7 +173,8 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && !(ap->flags & + (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/include/linux/libata.h b/include/linux/libata.h index 85b0aaee0ef8..724b7d1c18ea 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -113,6 +113,8 @@ enum { ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */ ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */ ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */ + ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once + * proper HSM is in place. */ ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ From 40e8c82c74b9be793601e098fd1313bc2632c5dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Aug 2005 17:12:45 +0900 Subject: [PATCH 129/584] [PATCH] libata: implement ata_poll_qc_complete and use it in polling functions [PATCH libata-dev-2.6:upstream] implement ata_poll_qc_complete and use it in polling functions Previously, libata polling functions turned irq back on and completed qc commands without holding host lock. This creates a race condition between the polling task and interrupts from other ports on the same host set or spurious interrupt from itself. This patch implements ata_poll_qc_complete which enables irq and completes qc atomically and convert all polling functions. Note: atapi_packet_task() didn't use to turn irq back on or clear ATA_FLAG_NOINTR on error exits. This patch makes it use ata_poll_qc_complete which does both. Note: With this change, ALL invocations of ata_qc_complete() are now done under host_set lock. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index c92439fe5dae..2f68563ac3a3 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2401,6 +2401,26 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) return 0; } +/** + * ata_poll_qc_complete - turn irq back on and finish qc + * @qc: Command to complete + * @drv_stat: ATA status register content + * + * LOCKING: + * None. (grabs host lock) + */ + +void ata_poll_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat) +{ + struct ata_port *ap = qc->ap; + + spin_lock_irq(&ap->host_set->lock); + ap->flags &= ~ATA_FLAG_NOINTR; + ata_irq_on(ap); + ata_qc_complete(qc, drv_stat); + spin_unlock_irq(&ap->host_set->lock); +} + /** * ata_pio_poll - * @ap: @@ -2492,9 +2512,7 @@ static void ata_pio_complete (struct ata_port *ap) ap->pio_task_state = PIO_ST_IDLE; - ata_irq_on(ap); - - ata_qc_complete(qc, drv_stat); + ata_poll_qc_complete(qc, drv_stat); } @@ -2844,9 +2862,7 @@ static void ata_pio_block(struct ata_port *ap) if ((status & ATA_DRQ) == 0) { ap->pio_task_state = PIO_ST_IDLE; - ata_irq_on(ap); - - ata_qc_complete(qc, status); + ata_poll_qc_complete(qc, status); return; } @@ -2876,9 +2892,7 @@ static void ata_pio_error(struct ata_port *ap) ap->pio_task_state = PIO_ST_IDLE; - ata_irq_on(ap); - - ata_qc_complete(qc, drv_stat | ATA_ERR); + ata_poll_qc_complete(qc, drv_stat | ATA_ERR); } static void ata_pio_task(void *_data) @@ -3791,7 +3805,7 @@ static void atapi_packet_task(void *_data) return; err_out: - ata_qc_complete(qc, ATA_ERR); + ata_poll_qc_complete(qc, ATA_ERR); } From e4deec6304cbd5fd08bf573eccc68787945071c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Aug 2005 07:27:25 +0900 Subject: [PATCH 130/584] [PATCH] sil: apply M15W quirk selectively (take 2) As SII reports that only original 3112's are affected by M15W quirk, This patch adds SIL_FLAG_MOD15WRITE to selectively apply M15W quirk depending on chipsets. As of yet, we don't know exactly which PCI IDs are for original 3112, so M15W quirk is applied to all except for 3512 and 3124. Once more info is avaliable, we can change some of these sil_3112_m15w's to sil_3112. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/sata_sil.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index a1b81d43b11f..e6902645f397 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -41,8 +41,11 @@ #define DRV_VERSION "0.9" enum { + SIL_FLAG_MOD15WRITE = (1 << 30), + sil_3112 = 0, - sil_3114 = 1, + sil_3112_m15w = 1, + sil_3114 = 2, SIL_FIFO_R0 = 0x40, SIL_FIFO_W0 = 0x41, @@ -76,13 +79,13 @@ static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val); static void sil_post_set_mode (struct ata_port *ap); static struct pci_device_id sil_pci_tbl[] = { - { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, - { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, + { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, + { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, { 0x1095, 0x3512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, { 0x1095, 0x3114, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3114 }, - { 0x1002, 0x436e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, - { 0x1002, 0x4379, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, - { 0x1002, 0x437a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 }, + { 0x1002, 0x436e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, + { 0x1002, 0x4379, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, + { 0x1002, 0x437a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, { } /* terminate list */ }; @@ -174,6 +177,16 @@ static struct ata_port_info sil_port_info[] = { .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ .port_ops = &sil_ops, + }, /* sil_3112_15w - keep it sync'd w/ sil_3112 */ + { + .sht = &sil_sht, + .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + ATA_FLAG_SRST | ATA_FLAG_MMIO | + SIL_FLAG_MOD15WRITE, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = 0x3f, /* udma0-5 */ + .port_ops = &sil_ops, }, /* sil_3114 */ { .sht = &sil_sht, @@ -331,7 +344,7 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev) } /* limit requests to 15 sectors */ - if (quirks & SIL_QUIRK_MOD15WRITE) { + if ((ap->flags & SIL_FLAG_MOD15WRITE) && (quirks & SIL_QUIRK_MOD15WRITE)) { printk(KERN_INFO "ata%u(%u): applying Seagate errata fix\n", ap->id, dev->devno); ap->host->max_sectors = 15; From efcce839360fb3a7b6dedeacaec80f68b0f2d052 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 20 Aug 2005 15:53:22 +1000 Subject: [PATCH 131/584] [PATCH] macsonic/jazzsonic network drivers update The purpose of this patch: - Adopt the DMA API (jazzsonic, macsonic & core driver). - Adopt the driver model (macsonic). This part was cribbed from jazzsonic. As a consequence, macsonic once again works as a module. Driver model is also used by the DMA calls. - Support 16 bit cards (macsonic & core driver, also affects jazzsonic) This code was adapted from the mac68k linux 2.2 kernel, where it has languished for a long time. - Support more 32-bit mac cards (macsonic) Also from mac68k repo. - Zero-copy buffer handling (core driver) Provides a nice performance improvement. The new algorithm incidentally helped to replace the old Jazz DMA code. The patch was tested on a variety of macs (several 32-bit quadra built-in NICs, a 16-bit LC PDS NIC and a 16-bit comm-slot NIC), and also on MIPS Jazz. Signed-off-by: Finn Thain Acked-by: Thomas Bogendoerfer Signed-off-by: Jeff Garzik --- drivers/net/Space.c | 4 - drivers/net/jazzsonic.c | 188 +++++------ drivers/net/macsonic.c | 550 ++++++++++++++++----------------- drivers/net/sonic.c | 668 ++++++++++++++++++++++++---------------- drivers/net/sonic.h | 450 +++++++++++++-------------- 5 files changed, 970 insertions(+), 890 deletions(-) diff --git a/drivers/net/Space.c b/drivers/net/Space.c index b28e5fde0b9e..60304f7e7e5b 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -87,7 +87,6 @@ extern struct net_device *mvme147lance_probe(int unit); extern struct net_device *tc515_probe(int unit); extern struct net_device *lance_probe(int unit); extern struct net_device *mace_probe(int unit); -extern struct net_device *macsonic_probe(int unit); extern struct net_device *mac8390_probe(int unit); extern struct net_device *mac89x0_probe(int unit); extern struct net_device *mc32_probe(int unit); @@ -284,9 +283,6 @@ static struct devprobe2 m68k_probes[] __initdata = { #ifdef CONFIG_MACMACE /* Mac 68k Quadra AV builtin Ethernet */ {mace_probe, 0}, #endif -#ifdef CONFIG_MACSONIC /* Mac SONIC-based Ethernet of all sorts */ - {macsonic_probe, 0}, -#endif #ifdef CONFIG_MAC8390 /* NuBus NS8390-based cards */ {mac8390_probe, 0}, #endif diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c index 7fec613e1675..8423cb6875f0 100644 --- a/drivers/net/jazzsonic.c +++ b/drivers/net/jazzsonic.c @@ -1,5 +1,10 @@ /* - * sonic.c + * jazzsonic.c + * + * (C) 2005 Finn Thain + * + * Converted to DMA API, and (from the mac68k project) introduced + * dhd's support for 16-bit cards. * * (C) 1996,1998 by Thomas Bogendoerfer (tsbogend@alpha.franken.de) * @@ -28,8 +33,8 @@ #include #include #include -#include #include +#include #include #include @@ -44,22 +49,20 @@ static struct platform_device *jazz_sonic_device; #define SONIC_MEM_SIZE 0x100 -#define SREGS_PAD(n) u16 n; - #include "sonic.h" /* * Macros to access SONIC registers */ -#define SONIC_READ(reg) (*((volatile unsigned int *)base_addr+reg)) +#define SONIC_READ(reg) (*((volatile unsigned int *)dev->base_addr+reg)) #define SONIC_WRITE(reg,val) \ do { \ - *((volatile unsigned int *)base_addr+(reg)) = (val); \ + *((volatile unsigned int *)dev->base_addr+(reg)) = (val); \ } while (0) -/* use 0 for production, 1 for verification, >2 for debug */ +/* use 0 for production, 1 for verification, >1 for debug */ #ifdef SONIC_DEBUG static unsigned int sonic_debug = SONIC_DEBUG; #else @@ -85,18 +88,18 @@ static unsigned short known_revisions[] = 0xffff /* end of list */ }; -static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr, - unsigned int irq) +static int __init sonic_probe1(struct net_device *dev) { static unsigned version_printed; unsigned int silicon_revision; unsigned int val; - struct sonic_local *lp; + struct sonic_local *lp = netdev_priv(dev); int err = -ENODEV; int i; - if (!request_mem_region(base_addr, SONIC_MEM_SIZE, jazz_sonic_string)) + if (!request_mem_region(dev->base_addr, SONIC_MEM_SIZE, jazz_sonic_string)) return -EBUSY; + /* * get the Silicon Revision ID. If this is one of the known * one assume that we found a SONIC ethernet controller at @@ -120,11 +123,7 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr, if (sonic_debug && version_printed++ == 0) printk(version); - printk("%s: Sonic ethernet found at 0x%08lx, ", dev->name, base_addr); - - /* Fill in the 'dev' fields. */ - dev->base_addr = base_addr; - dev->irq = irq; + printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ", lp->device->bus_id, dev->base_addr); /* * Put the sonic into software reset, then @@ -138,84 +137,44 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr, dev->dev_addr[i*2+1] = val >> 8; } - printk("HW Address "); - for (i = 0; i < 6; i++) { - printk("%2.2x", dev->dev_addr[i]); - if (i<5) - printk(":"); - } - - printk(" IRQ %d\n", irq); - err = -ENOMEM; /* Initialize the device structure. */ - if (dev->priv == NULL) { - /* - * the memory be located in the same 64kb segment - */ - lp = NULL; - i = 0; - do { - lp = kmalloc(sizeof(*lp), GFP_KERNEL); - if ((unsigned long) lp >> 16 - != ((unsigned long)lp + sizeof(*lp) ) >> 16) { - /* FIXME, free the memory later */ - kfree(lp); - lp = NULL; - } - } while (lp == NULL && i++ < 20); - if (lp == NULL) { - printk("%s: couldn't allocate memory for descriptors\n", - dev->name); - goto out; - } + lp->dma_bitmode = SONIC_BITMODE32; - memset(lp, 0, sizeof(struct sonic_local)); - - /* get the virtual dma address */ - lp->cda_laddr = vdma_alloc(CPHYSADDR(lp),sizeof(*lp)); - if (lp->cda_laddr == ~0UL) { - printk("%s: couldn't get DMA page entry for " - "descriptors\n", dev->name); - goto out1; - } - - lp->tda_laddr = lp->cda_laddr + sizeof (lp->cda); - lp->rra_laddr = lp->tda_laddr + sizeof (lp->tda); - lp->rda_laddr = lp->rra_laddr + sizeof (lp->rra); - - /* allocate receive buffer area */ - /* FIXME, maybe we should use skbs */ - lp->rba = kmalloc(SONIC_NUM_RRS * SONIC_RBSIZE, GFP_KERNEL); - if (!lp->rba) { - printk("%s: couldn't allocate receive buffers\n", - dev->name); - goto out2; - } - - /* get virtual dma address */ - lp->rba_laddr = vdma_alloc(CPHYSADDR(lp->rba), - SONIC_NUM_RRS * SONIC_RBSIZE); - if (lp->rba_laddr == ~0UL) { - printk("%s: couldn't get DMA page entry for receive " - "buffers\n",dev->name); - goto out3; - } - - /* now convert pointer to KSEG1 pointer */ - lp->rba = (char *)KSEG1ADDR(lp->rba); - flush_cache_all(); - dev->priv = (struct sonic_local *)KSEG1ADDR(lp); + /* Allocate the entire chunk of memory for the descriptors. + Note that this cannot cross a 64K boundary. */ + if ((lp->descriptors = dma_alloc_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + &lp->descriptors_laddr, GFP_KERNEL)) == NULL) { + printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", lp->device->bus_id); + goto out; } - lp = (struct sonic_local *)dev->priv; + /* Now set up the pointers to point to the appropriate places */ + lp->cda = lp->descriptors; + lp->tda = lp->cda + (SIZEOF_SONIC_CDA + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rda = lp->tda + (SIZEOF_SONIC_TD * SONIC_NUM_TDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rra = lp->rda + (SIZEOF_SONIC_RD * SONIC_NUM_RDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); + + lp->cda_laddr = lp->descriptors_laddr; + lp->tda_laddr = lp->cda_laddr + (SIZEOF_SONIC_CDA + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rda_laddr = lp->tda_laddr + (SIZEOF_SONIC_TD * SONIC_NUM_TDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rra_laddr = lp->rda_laddr + (SIZEOF_SONIC_RD * SONIC_NUM_RDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); + dev->open = sonic_open; dev->stop = sonic_close; dev->hard_start_xmit = sonic_send_packet; - dev->get_stats = sonic_get_stats; + dev->get_stats = sonic_get_stats; dev->set_multicast_list = &sonic_multicast_list; + dev->tx_timeout = sonic_tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; /* @@ -226,14 +185,8 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr, SONIC_WRITE(SONIC_MPT,0xffff); return 0; -out3: - kfree(lp->rba); -out2: - vdma_free(lp->cda_laddr); -out1: - kfree(lp); out: - release_region(base_addr, SONIC_MEM_SIZE); + release_region(dev->base_addr, SONIC_MEM_SIZE); return err; } @@ -245,7 +198,6 @@ static int __init jazz_sonic_probe(struct device *device) { struct net_device *dev; struct sonic_local *lp; - unsigned long base_addr; int err = 0; int i; @@ -255,21 +207,26 @@ static int __init jazz_sonic_probe(struct device *device) if (mips_machgroup != MACH_GROUP_JAZZ) return -ENODEV; - dev = alloc_etherdev(0); + dev = alloc_etherdev(sizeof(struct sonic_local)); if (!dev) return -ENOMEM; - netdev_boot_setup_check(dev); - base_addr = dev->base_addr; + lp = netdev_priv(dev); + lp->device = device; + SET_NETDEV_DEV(dev, device); + SET_MODULE_OWNER(dev); - if (base_addr >= KSEG0) { /* Check a single specified location. */ - err = sonic_probe1(dev, base_addr, dev->irq); - } else if (base_addr != 0) { /* Don't probe at all. */ + netdev_boot_setup_check(dev); + + if (dev->base_addr >= KSEG0) { /* Check a single specified location. */ + err = sonic_probe1(dev); + } else if (dev->base_addr != 0) { /* Don't probe at all. */ err = -ENXIO; } else { for (i = 0; sonic_portlist[i].port; i++) { - int io = sonic_portlist[i].port; - if (sonic_probe1(dev, io, sonic_portlist[i].irq) == 0) + dev->base_addr = sonic_portlist[i].port; + dev->irq = sonic_portlist[i].irq; + if (sonic_probe1(dev) == 0) break; } if (!sonic_portlist[i].port) @@ -281,14 +238,17 @@ static int __init jazz_sonic_probe(struct device *device) if (err) goto out1; + printk("%s: MAC ", dev->name); + for (i = 0; i < 6; i++) { + printk("%2.2x", dev->dev_addr[i]); + if (i < 5) + printk(":"); + } + printk(" IRQ %d\n", dev->irq); + return 0; out1: - lp = dev->priv; - vdma_free(lp->rba_laddr); - kfree(lp->rba); - vdma_free(lp->cda_laddr); - kfree(lp); release_region(dev->base_addr, SONIC_MEM_SIZE); out: free_netdev(dev); @@ -296,21 +256,22 @@ out: return err; } -/* - * SONIC uses a normal IRQ - */ -#define sonic_request_irq request_irq -#define sonic_free_irq free_irq +MODULE_DESCRIPTION("Jazz SONIC ethernet driver"); +module_param(sonic_debug, int, 0); +MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)"); -#define sonic_chiptomem(x) KSEG1ADDR(vdma_log2phys(x)) +#define SONIC_IRQ_FLAG SA_INTERRUPT #include "sonic.c" static int __devexit jazz_sonic_device_remove (struct device *device) { struct net_device *dev = device->driver_data; + struct sonic_local* lp = netdev_priv(dev); unregister_netdev (dev); + dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); release_region (dev->base_addr, SONIC_MEM_SIZE); free_netdev (dev); @@ -323,7 +284,7 @@ static struct device_driver jazz_sonic_driver = { .probe = jazz_sonic_probe, .remove = __devexit_p(jazz_sonic_device_remove), }; - + static void jazz_sonic_platform_release (struct device *device) { struct platform_device *pldev; @@ -336,10 +297,11 @@ static void jazz_sonic_platform_release (struct device *device) static int __init jazz_sonic_init_module(void) { struct platform_device *pldev; + int err; - if (driver_register(&jazz_sonic_driver)) { + if ((err = driver_register(&jazz_sonic_driver))) { printk(KERN_ERR "Driver registration failed\n"); - return -ENOMEM; + return err; } jazz_sonic_device = NULL; diff --git a/drivers/net/macsonic.c b/drivers/net/macsonic.c index be28c65de729..405e18365ede 100644 --- a/drivers/net/macsonic.c +++ b/drivers/net/macsonic.c @@ -1,6 +1,12 @@ /* * macsonic.c * + * (C) 2005 Finn Thain + * + * Converted to DMA API, converted to unified driver model, made it work as + * a module again, and from the mac68k project, introduced more 32-bit cards + * and dhd's support for 16-bit cards. + * * (C) 1998 Alan Cox * * Debugging Andreas Ehliar, Michael Schmitz @@ -26,8 +32,8 @@ */ #include +#include #include -#include #include #include #include @@ -41,8 +47,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -54,25 +60,28 @@ #include #include -#define SREGS_PAD(n) u16 n; +static char mac_sonic_string[] = "macsonic"; +static struct platform_device *mac_sonic_device; #include "sonic.h" -#define SONIC_READ(reg) \ - nubus_readl(base_addr+(reg)) -#define SONIC_WRITE(reg,val) \ - nubus_writel((val), base_addr+(reg)) -#define sonic_read(dev, reg) \ - nubus_readl((dev)->base_addr+(reg)) -#define sonic_write(dev, reg, val) \ - nubus_writel((val), (dev)->base_addr+(reg)) +/* These should basically be bus-size and endian independent (since + the SONIC is at least smart enough that it uses the same endianness + as the host, unlike certain less enlightened Macintosh NICs) */ +#define SONIC_READ(reg) (nubus_readw(dev->base_addr + (reg * 4) \ + + lp->reg_offset)) +#define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \ + + lp->reg_offset)) +/* use 0 for production, 1 for verification, >1 for debug */ +#ifdef SONIC_DEBUG +static unsigned int sonic_debug = SONIC_DEBUG; +#else +static unsigned int sonic_debug = 1; +#endif -static int sonic_debug; static int sonic_version_printed; -static int reg_offset; - extern int mac_onboard_sonic_probe(struct net_device* dev); extern int mac_nubus_sonic_probe(struct net_device* dev); @@ -108,40 +117,6 @@ enum macsonic_type { #define SONIC_READ_PROM(addr) nubus_readb(prom_addr+addr) -struct net_device * __init macsonic_probe(int unit) -{ - struct net_device *dev = alloc_etherdev(0); - int err; - - if (!dev) - return ERR_PTR(-ENOMEM); - - if (unit >= 0) - sprintf(dev->name, "eth%d", unit); - - SET_MODULE_OWNER(dev); - - /* This will catch fatal stuff like -ENOMEM as well as success */ - err = mac_onboard_sonic_probe(dev); - if (err == 0) - goto found; - if (err != -ENODEV) - goto out; - err = mac_nubus_sonic_probe(dev); - if (err) - goto out; -found: - err = register_netdev(dev); - if (err) - goto out1; - return dev; -out1: - kfree(dev->priv); -out: - free_netdev(dev); - return ERR_PTR(err); -} - /* * For reversing the PROM address */ @@ -160,103 +135,55 @@ static inline void bit_reverse_addr(unsigned char addr[6]) int __init macsonic_init(struct net_device* dev) { - struct sonic_local* lp = NULL; - int i; + struct sonic_local* lp = netdev_priv(dev); /* Allocate the entire chunk of memory for the descriptors. Note that this cannot cross a 64K boundary. */ - for (i = 0; i < 20; i++) { - unsigned long desc_base, desc_top; - if((lp = kmalloc(sizeof(struct sonic_local), GFP_KERNEL | GFP_DMA)) == NULL) { - printk(KERN_ERR "%s: couldn't allocate descriptor buffers\n", dev->name); - return -ENOMEM; - } - - desc_base = (unsigned long) lp; - desc_top = desc_base + sizeof(struct sonic_local); - if ((desc_top & 0xffff) >= (desc_base & 0xffff)) - break; - /* Hmm. try again (FIXME: does this actually work?) */ - kfree(lp); - printk(KERN_DEBUG - "%s: didn't get continguous chunk [%08lx - %08lx], trying again\n", - dev->name, desc_base, desc_top); - } - - if (lp == NULL) { - printk(KERN_ERR "%s: tried 20 times to allocate descriptor buffers, giving up.\n", - dev->name); + if ((lp->descriptors = dma_alloc_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + &lp->descriptors_laddr, GFP_KERNEL)) == NULL) { + printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", lp->device->bus_id); return -ENOMEM; - } - - dev->priv = lp; - -#if 0 - /* this code is only here as a curiousity... mainly, where the - fuck did SONIC_BUS_SCALE come from, and what was it supposed - to do? the normal allocation works great for 32 bit stuffs.. */ + } /* Now set up the pointers to point to the appropriate places */ - lp->cda = lp->sonic_desc; - lp->tda = lp->cda + (SIZEOF_SONIC_CDA * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->cda = lp->descriptors; + lp->tda = lp->cda + (SIZEOF_SONIC_CDA + * SONIC_BUS_SCALE(lp->dma_bitmode)); lp->rda = lp->tda + (SIZEOF_SONIC_TD * SONIC_NUM_TDS - * SONIC_BUS_SCALE(lp->dma_bitmode)); + * SONIC_BUS_SCALE(lp->dma_bitmode)); lp->rra = lp->rda + (SIZEOF_SONIC_RD * SONIC_NUM_RDS - * SONIC_BUS_SCALE(lp->dma_bitmode)); + * SONIC_BUS_SCALE(lp->dma_bitmode)); -#endif - - memset(lp, 0, sizeof(struct sonic_local)); - - lp->cda_laddr = (unsigned int)&(lp->cda); - lp->tda_laddr = (unsigned int)lp->tda; - lp->rra_laddr = (unsigned int)lp->rra; - lp->rda_laddr = (unsigned int)lp->rda; - - /* FIXME, maybe we should use skbs */ - if ((lp->rba = (char *) - kmalloc(SONIC_NUM_RRS * SONIC_RBSIZE, GFP_KERNEL | GFP_DMA)) == NULL) { - printk(KERN_ERR "%s: couldn't allocate receive buffers\n", dev->name); - dev->priv = NULL; - kfree(lp); - return -ENOMEM; - } - - lp->rba_laddr = (unsigned int)lp->rba; - - { - int rs, ds; - - /* almost always 12*4096, but let's not take chances */ - rs = ((SONIC_NUM_RRS * SONIC_RBSIZE + 4095) / 4096) * 4096; - /* almost always under a page, but let's not take chances */ - ds = ((sizeof(struct sonic_local) + 4095) / 4096) * 4096; - kernel_set_cachemode(lp->rba, rs, IOMAP_NOCACHE_SER); - kernel_set_cachemode(lp, ds, IOMAP_NOCACHE_SER); - } - -#if 0 - flush_cache_all(); -#endif + lp->cda_laddr = lp->descriptors_laddr; + lp->tda_laddr = lp->cda_laddr + (SIZEOF_SONIC_CDA + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rda_laddr = lp->tda_laddr + (SIZEOF_SONIC_TD * SONIC_NUM_TDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->rra_laddr = lp->rda_laddr + (SIZEOF_SONIC_RD * SONIC_NUM_RDS + * SONIC_BUS_SCALE(lp->dma_bitmode)); dev->open = sonic_open; dev->stop = sonic_close; dev->hard_start_xmit = sonic_send_packet; dev->get_stats = sonic_get_stats; dev->set_multicast_list = &sonic_multicast_list; + dev->tx_timeout = sonic_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; /* * clear tally counter */ - sonic_write(dev, SONIC_CRCT, 0xffff); - sonic_write(dev, SONIC_FAET, 0xffff); - sonic_write(dev, SONIC_MPT, 0xffff); + SONIC_WRITE(SONIC_CRCT, 0xffff); + SONIC_WRITE(SONIC_FAET, 0xffff); + SONIC_WRITE(SONIC_MPT, 0xffff); return 0; } int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev) { + struct sonic_local *lp = netdev_priv(dev); const int prom_addr = ONBOARD_SONIC_PROM_BASE; int i; @@ -270,6 +197,7 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev) why this is so. */ if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) && memcmp(dev->dev_addr, "\x00\xA0\x40", 3) && + memcmp(dev->dev_addr, "\x00\x80\x19", 3) && memcmp(dev->dev_addr, "\x00\x05\x02", 3)) bit_reverse_addr(dev->dev_addr); else @@ -281,22 +209,23 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev) the card... */ if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) && memcmp(dev->dev_addr, "\x00\xA0\x40", 3) && + memcmp(dev->dev_addr, "\x00\x80\x19", 3) && memcmp(dev->dev_addr, "\x00\x05\x02", 3)) { unsigned short val; printk(KERN_INFO "macsonic: PROM seems to be wrong, trying CAM entry 15\n"); - sonic_write(dev, SONIC_CMD, SONIC_CR_RST); - sonic_write(dev, SONIC_CEP, 15); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); + SONIC_WRITE(SONIC_CEP, 15); - val = sonic_read(dev, SONIC_CAP2); + val = SONIC_READ(SONIC_CAP2); dev->dev_addr[5] = val >> 8; dev->dev_addr[4] = val & 0xff; - val = sonic_read(dev, SONIC_CAP1); + val = SONIC_READ(SONIC_CAP1); dev->dev_addr[3] = val >> 8; dev->dev_addr[2] = val & 0xff; - val = sonic_read(dev, SONIC_CAP0); + val = SONIC_READ(SONIC_CAP0); dev->dev_addr[1] = val >> 8; dev->dev_addr[0] = val & 0xff; @@ -311,6 +240,7 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev) if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) && memcmp(dev->dev_addr, "\x00\xA0\x40", 3) && + memcmp(dev->dev_addr, "\x00\x80\x19", 3) && memcmp(dev->dev_addr, "\x00\x05\x02", 3)) { /* @@ -325,8 +255,9 @@ int __init mac_onboard_sonic_probe(struct net_device* dev) { /* Bwahahaha */ static int once_is_more_than_enough; - int i; - int dma_bitmode; + struct sonic_local* lp = netdev_priv(dev); + int sr; + int commslot = 0; if (once_is_more_than_enough) return -ENODEV; @@ -335,20 +266,18 @@ int __init mac_onboard_sonic_probe(struct net_device* dev) if (!MACH_IS_MAC) return -ENODEV; - printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. "); - if (macintosh_config->ether_type != MAC_ETHER_SONIC) - { - printk("none.\n"); return -ENODEV; - } - + + printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. "); + /* Bogus probing, on the models which may or may not have Ethernet (BTW, the Ethernet *is* always at the same address, and nothing else lives there, at least if Apple's documentation is to be believed) */ if (macintosh_config->ident == MAC_MODEL_Q630 || macintosh_config->ident == MAC_MODEL_P588 || + macintosh_config->ident == MAC_MODEL_P575 || macintosh_config->ident == MAC_MODEL_C610) { unsigned long flags; int card_present; @@ -361,13 +290,13 @@ int __init mac_onboard_sonic_probe(struct net_device* dev) printk("none.\n"); return -ENODEV; } + commslot = 1; } printk("yes\n"); - /* Danger! My arms are flailing wildly! You *must* set this - before using sonic_read() */ - + /* Danger! My arms are flailing wildly! You *must* set lp->reg_offset + * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */ dev->base_addr = ONBOARD_SONIC_REGISTERS; if (via_alt_mapping) dev->irq = IRQ_AUTO_3; @@ -379,84 +308,66 @@ int __init mac_onboard_sonic_probe(struct net_device* dev) sonic_version_printed = 1; } printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n", - dev->name, dev->base_addr); - - /* Now do a song and dance routine in an attempt to determine - the bus width */ + lp->device->bus_id, dev->base_addr); /* The PowerBook's SONIC is 16 bit always. */ if (macintosh_config->ident == MAC_MODEL_PB520) { - reg_offset = 0; - dma_bitmode = 0; - } else if (macintosh_config->ident == MAC_MODEL_C610) { - reg_offset = 0; - dma_bitmode = 1; - } else { + lp->reg_offset = 0; + lp->dma_bitmode = SONIC_BITMODE16; + sr = SONIC_READ(SONIC_SR); + } else if (commslot) { /* Some of the comm-slot cards are 16 bit. But some - of them are not. The 32-bit cards use offset 2 and - pad with zeroes or sometimes ones (I think...) - Therefore, if we try offset 0 and get a silicon - revision of 0, we assume 16 bit. */ - int sr; + of them are not. The 32-bit cards use offset 2 and + have known revisions, we try reading the revision + register at offset 2, if we don't get a known revision + we assume 16 bit at offset 0. */ + lp->reg_offset = 2; + lp->dma_bitmode = SONIC_BITMODE16; - /* Technically this is not necessary since we zeroed - it above */ - reg_offset = 0; - dma_bitmode = 0; - sr = sonic_read(dev, SONIC_SR); - if (sr == 0 || sr == 0xffff) { - reg_offset = 2; - /* 83932 is 0x0004, 83934 is 0x0100 or 0x0101 */ - sr = sonic_read(dev, SONIC_SR); - dma_bitmode = 1; - + sr = SONIC_READ(SONIC_SR); + if (sr == 0x0004 || sr == 0x0006 || sr == 0x0100 || sr == 0x0101) + /* 83932 is 0x0004 or 0x0006, 83934 is 0x0100 or 0x0101 */ + lp->dma_bitmode = SONIC_BITMODE32; + else { + lp->dma_bitmode = SONIC_BITMODE16; + lp->reg_offset = 0; + sr = SONIC_READ(SONIC_SR); } - printk(KERN_INFO - "%s: revision 0x%04x, using %d bit DMA and register offset %d\n", - dev->name, sr, dma_bitmode?32:16, reg_offset); + } else { + /* All onboard cards are at offset 2 with 32 bit DMA. */ + lp->reg_offset = 2; + lp->dma_bitmode = SONIC_BITMODE32; + sr = SONIC_READ(SONIC_SR); } - + printk(KERN_INFO + "%s: revision 0x%04x, using %d bit DMA and register offset %d\n", + lp->device->bus_id, sr, lp->dma_bitmode?32:16, lp->reg_offset); - /* this carries my sincere apologies -- by the time I got to updating - the driver, support for "reg_offsets" appeares nowhere in the sonic - code, going back for over a year. Fortunately, my Mac does't seem - to use whatever this was. +#if 0 /* This is sometimes useful to find out how MacOS configured the card. */ + printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", lp->device->bus_id, + SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff); +#endif - If you know how this is supposed to be implemented, either fix it, - or contact me (sammy@oh.verio.com) to explain what it is. --Sam */ - - if(reg_offset) { - printk("%s: register offset unsupported. please fix this if you know what it is.\n", dev->name); - return -ENODEV; - } - /* Software reset, then initialize control registers. */ - sonic_write(dev, SONIC_CMD, SONIC_CR_RST); - sonic_write(dev, SONIC_DCR, SONIC_DCR_BMS | - SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_EXBUS | - (dma_bitmode ? SONIC_DCR_DW : 0)); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); + + SONIC_WRITE(SONIC_DCR, SONIC_DCR_EXBUS | SONIC_DCR_BMS | + SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | + (lp->dma_bitmode ? SONIC_DCR_DW : 0)); /* This *must* be written back to in order to restore the - extended programmable output bits */ - sonic_write(dev, SONIC_DCR2, 0); + * extended programmable output bits, as it may not have been + * initialised since the hardware reset. */ + SONIC_WRITE(SONIC_DCR2, 0); /* Clear *and* disable interrupts to be on the safe side */ - sonic_write(dev, SONIC_ISR,0x7fff); - sonic_write(dev, SONIC_IMR,0); + SONIC_WRITE(SONIC_IMR, 0); + SONIC_WRITE(SONIC_ISR, 0x7fff); /* Now look for the MAC address. */ if (mac_onboard_sonic_ethernet_addr(dev) != 0) return -ENODEV; - printk(KERN_INFO "MAC "); - for (i = 0; i < 6; i++) { - printk("%2.2x", dev->dev_addr[i]); - if (i < 5) - printk(":"); - } - - printk(" IRQ %d\n", dev->irq); - /* Shared init code */ return macsonic_init(dev); } @@ -468,8 +379,10 @@ int __init mac_nubus_sonic_ethernet_addr(struct net_device* dev, int i; for(i = 0; i < 6; i++) dev->dev_addr[i] = SONIC_READ_PROM(i); - /* For now we are going to assume that they're all bit-reversed */ - bit_reverse_addr(dev->dev_addr); + + /* Some of the addresses are bit-reversed */ + if (id != MACSONIC_DAYNA) + bit_reverse_addr(dev->dev_addr); return 0; } @@ -487,6 +400,15 @@ int __init macsonic_ident(struct nubus_dev* ndev) else return MACSONIC_APPLE; } + + if (ndev->dr_hw == NUBUS_DRHW_SMC9194 && + ndev->dr_sw == NUBUS_DRSW_DAYNA) + return MACSONIC_DAYNA; + + if (ndev->dr_hw == NUBUS_DRHW_SONIC_LC && + ndev->dr_sw == 0) { /* huh? */ + return MACSONIC_APPLE16; + } return -1; } @@ -494,12 +416,12 @@ int __init mac_nubus_sonic_probe(struct net_device* dev) { static int slots; struct nubus_dev* ndev = NULL; + struct sonic_local* lp = netdev_priv(dev); unsigned long base_addr, prom_addr; u16 sonic_dcr; - int id; - int i; - int dma_bitmode; - + int id = -1; + int reg_offset, dma_bitmode; + /* Find the first SONIC that hasn't been initialized already */ while ((ndev = nubus_find_type(NUBUS_CAT_NETWORK, NUBUS_TYPE_ETHERNET, ndev)) != NULL) @@ -521,51 +443,52 @@ int __init mac_nubus_sonic_probe(struct net_device* dev) case MACSONIC_DUODOCK: base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS; prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE; - sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 - | SONIC_DCR_TFT0; + sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 | + SONIC_DCR_TFT0; reg_offset = 2; - dma_bitmode = 1; + dma_bitmode = SONIC_BITMODE32; break; case MACSONIC_APPLE: base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE; sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0; reg_offset = 0; - dma_bitmode = 1; + dma_bitmode = SONIC_BITMODE32; break; case MACSONIC_APPLE16: base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE; - sonic_dcr = SONIC_DCR_EXBUS - | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 - | SONIC_DCR_PO1 | SONIC_DCR_BMS; + sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | + SONIC_DCR_PO1 | SONIC_DCR_BMS; reg_offset = 0; - dma_bitmode = 0; + dma_bitmode = SONIC_BITMODE16; break; case MACSONIC_DAYNALINK: base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE; - sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 - | SONIC_DCR_PO1 | SONIC_DCR_BMS; + sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | + SONIC_DCR_PO1 | SONIC_DCR_BMS; reg_offset = 0; - dma_bitmode = 0; + dma_bitmode = SONIC_BITMODE16; break; case MACSONIC_DAYNA: base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS; prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR; - sonic_dcr = SONIC_DCR_BMS - | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1; + sonic_dcr = SONIC_DCR_BMS | + SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1; reg_offset = 0; - dma_bitmode = 0; + dma_bitmode = SONIC_BITMODE16; break; default: printk(KERN_ERR "macsonic: WTF, id is %d\n", id); return -ENODEV; } - /* Danger! My arms are flailing wildly! You *must* set this - before using sonic_read() */ + /* Danger! My arms are flailing wildly! You *must* set lp->reg_offset + * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */ dev->base_addr = base_addr; + lp->reg_offset = reg_offset; + lp->dma_bitmode = dma_bitmode; dev->irq = SLOT2IRQ(ndev->board->slot); if (!sonic_version_printed) { @@ -573,29 +496,66 @@ int __init mac_nubus_sonic_probe(struct net_device* dev) sonic_version_printed = 1; } printk(KERN_INFO "%s: %s in slot %X\n", - dev->name, ndev->board->name, ndev->board->slot); + lp->device->bus_id, ndev->board->name, ndev->board->slot); printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n", - dev->name, sonic_read(dev, SONIC_SR), dma_bitmode?32:16, reg_offset); + lp->device->bus_id, SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset); - if(reg_offset) { - printk("%s: register offset unsupported. please fix this if you know what it is.\n", dev->name); - return -ENODEV; - } +#if 0 /* This is sometimes useful to find out how MacOS configured the card. */ + printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", lp->device->bus_id, + SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff); +#endif /* Software reset, then initialize control registers. */ - sonic_write(dev, SONIC_CMD, SONIC_CR_RST); - sonic_write(dev, SONIC_DCR, sonic_dcr - | (dma_bitmode ? SONIC_DCR_DW : 0)); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); + SONIC_WRITE(SONIC_DCR, sonic_dcr | (dma_bitmode ? SONIC_DCR_DW : 0)); + /* This *must* be written back to in order to restore the + * extended programmable output bits, since it may not have been + * initialised since the hardware reset. */ + SONIC_WRITE(SONIC_DCR2, 0); /* Clear *and* disable interrupts to be on the safe side */ - sonic_write(dev, SONIC_ISR,0x7fff); - sonic_write(dev, SONIC_IMR,0); + SONIC_WRITE(SONIC_IMR, 0); + SONIC_WRITE(SONIC_ISR, 0x7fff); /* Now look for the MAC address. */ if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0) return -ENODEV; - printk(KERN_INFO "MAC "); + /* Shared init code */ + return macsonic_init(dev); +} + +static int __init mac_sonic_probe(struct device *device) +{ + struct net_device *dev; + struct sonic_local *lp; + int err; + int i; + + dev = alloc_etherdev(sizeof(struct sonic_local)); + if (!dev) + return -ENOMEM; + + lp = netdev_priv(dev); + lp->device = device; + SET_NETDEV_DEV(dev, device); + SET_MODULE_OWNER(dev); + + /* This will catch fatal stuff like -ENOMEM as well as success */ + err = mac_onboard_sonic_probe(dev); + if (err == 0) + goto found; + if (err != -ENODEV) + goto out; + err = mac_nubus_sonic_probe(dev); + if (err) + goto out; +found: + err = register_netdev(dev); + if (err) + goto out; + + printk("%s: MAC ", dev->name); for (i = 0; i < 6; i++) { printk("%2.2x", dev->dev_addr[i]); if (i < 5) @@ -603,55 +563,95 @@ int __init mac_nubus_sonic_probe(struct net_device* dev) } printk(" IRQ %d\n", dev->irq); - /* Shared init code */ - return macsonic_init(dev); + return 0; + +out: + free_netdev(dev); + + return err; } -#ifdef MODULE -static struct net_device *dev_macsonic; - -MODULE_PARM(sonic_debug, "i"); +MODULE_DESCRIPTION("Macintosh SONIC ethernet driver"); +module_param(sonic_debug, int, 0); MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)"); -int -init_module(void) -{ - dev_macsonic = macsonic_probe(-1); - if (IS_ERR(dev_macsonic)) { - printk(KERN_WARNING "macsonic.c: No card found\n"); - return PTR_ERR(dev_macsonic); - } - return 0; -} - -void -cleanup_module(void) -{ - unregister_netdev(dev_macsonic); - kfree(dev_macsonic->priv); - free_netdev(dev_macsonic); -} -#endif /* MODULE */ - - -#define vdma_alloc(foo, bar) ((u32)foo) -#define vdma_free(baz) -#define sonic_chiptomem(bat) (bat) -#define PHYSADDR(quux) (quux) -#define CPHYSADDR(quux) (quux) - -#define sonic_request_irq request_irq -#define sonic_free_irq free_irq +#define SONIC_IRQ_FLAG IRQ_FLG_FAST #include "sonic.c" -/* - * Local variables: - * compile-command: "m68k-linux-gcc -D__KERNEL__ -I../../include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -pipe -fno-strength-reduce -ffixed-a2 -DMODULE -DMODVERSIONS -include ../../include/linux/modversions.h -c -o macsonic.o macsonic.c" - * version-control: t - * kept-new-versions: 5 - * c-indent-level: 8 - * tab-width: 8 - * End: - * - */ +static int __devexit mac_sonic_device_remove (struct device *device) +{ + struct net_device *dev = device->driver_data; + struct sonic_local* lp = netdev_priv(dev); + + unregister_netdev (dev); + dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); + free_netdev (dev); + + return 0; +} + +static struct device_driver mac_sonic_driver = { + .name = mac_sonic_string, + .bus = &platform_bus_type, + .probe = mac_sonic_probe, + .remove = __devexit_p(mac_sonic_device_remove), +}; + +static void mac_sonic_platform_release(struct device *device) +{ + struct platform_device *pldev; + + /* free device */ + pldev = to_platform_device (device); + kfree (pldev); +} + +static int __init mac_sonic_init_module(void) +{ + struct platform_device *pldev; + int err; + + if ((err = driver_register(&mac_sonic_driver))) { + printk(KERN_ERR "Driver registration failed\n"); + return err; + } + + mac_sonic_device = NULL; + + if (!(pldev = kmalloc (sizeof (*pldev), GFP_KERNEL))) { + goto out_unregister; + } + + memset(pldev, 0, sizeof (*pldev)); + pldev->name = mac_sonic_string; + pldev->id = 0; + pldev->dev.release = mac_sonic_platform_release; + mac_sonic_device = pldev; + + if (platform_device_register (pldev)) { + kfree(pldev); + mac_sonic_device = NULL; + } + + return 0; + +out_unregister: + platform_device_unregister(pldev); + + return -ENOMEM; +} + +static void __exit mac_sonic_cleanup_module(void) +{ + driver_unregister(&mac_sonic_driver); + + if (mac_sonic_device) { + platform_device_unregister(mac_sonic_device); + mac_sonic_device = NULL; + } +} + +module_init(mac_sonic_init_module); +module_exit(mac_sonic_cleanup_module); diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c index cdc9cc873e06..90b818a8de6e 100644 --- a/drivers/net/sonic.c +++ b/drivers/net/sonic.c @@ -1,6 +1,11 @@ /* * sonic.c * + * (C) 2005 Finn Thain + * + * Converted to DMA API, added zero-copy buffer handling, and + * (from the mac68k project) introduced dhd's support for 16-bit cards. + * * (C) 1996,1998 by Thomas Bogendoerfer (tsbogend@alpha.franken.de) * * This driver is based on work from Andreas Busse, but most of @@ -9,12 +14,23 @@ * (C) 1995 by Andreas Busse (andy@waldorf-gmbh.de) * * Core code included by system sonic drivers + * + * And... partially rewritten again by David Huggins-Daines in order + * to cope with screwed up Macintosh NICs that may or may not use + * 16-bit DMA. + * + * (C) 1999 David Huggins-Daines + * */ /* * Sources: Olivetti M700-10 Risc Personal Computer hardware handbook, * National Semiconductors data sheet for the DP83932B Sonic Ethernet * controller, and the files "8390.c" and "skeleton.c" in this directory. + * + * Additional sources: Nat Semi data sheet for the DP83932C and Nat Semi + * Application Note AN-746, the files "lance.c" and "ibmlana.c". See also + * the NetBSD file "sys/arch/mac68k/dev/if_sn.c". */ @@ -28,6 +44,9 @@ */ static int sonic_open(struct net_device *dev) { + struct sonic_local *lp = netdev_priv(dev); + int i; + if (sonic_debug > 2) printk("sonic_open: initializing sonic driver.\n"); @@ -40,14 +59,59 @@ static int sonic_open(struct net_device *dev) * This means that during execution of the handler interrupt are disabled * covering another bug otherwise corrupting data. This doesn't mean * this glue works ok under all situations. + * + * Note (dhd): this also appears to prevent lockups on the Macintrash + * when more than one Ethernet card is installed (knock on wood) + * + * Note (fthain): whether the above is still true is anyones guess. Certainly + * the buffer handling algorithms will not tolerate re-entrance without some + * mutual exclusion added. Anyway, the memcpy has now been eliminated from the + * rx code to make this a faster "fast interrupt". */ -// if (sonic_request_irq(dev->irq, &sonic_interrupt, 0, "sonic", dev)) { - if (sonic_request_irq(dev->irq, &sonic_interrupt, SA_INTERRUPT, - "sonic", dev)) { - printk("\n%s: unable to get IRQ %d .\n", dev->name, dev->irq); + if (request_irq(dev->irq, &sonic_interrupt, SONIC_IRQ_FLAG, "sonic", dev)) { + printk(KERN_ERR "\n%s: unable to get IRQ %d .\n", dev->name, dev->irq); return -EAGAIN; } + for (i = 0; i < SONIC_NUM_RRS; i++) { + struct sk_buff *skb = dev_alloc_skb(SONIC_RBSIZE + 2); + if (skb == NULL) { + while(i > 0) { /* free any that were allocated successfully */ + i--; + dev_kfree_skb(lp->rx_skb[i]); + lp->rx_skb[i] = NULL; + } + printk(KERN_ERR "%s: couldn't allocate receive buffers\n", + dev->name); + return -ENOMEM; + } + skb->dev = dev; + /* align IP header unless DMA requires otherwise */ + if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2) + skb_reserve(skb, 2); + lp->rx_skb[i] = skb; + } + + for (i = 0; i < SONIC_NUM_RRS; i++) { + dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE), + SONIC_RBSIZE, DMA_FROM_DEVICE); + if (!laddr) { + while(i > 0) { /* free any that were mapped successfully */ + i--; + dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE); + lp->rx_laddr[i] = (dma_addr_t)0; + } + for (i = 0; i < SONIC_NUM_RRS; i++) { + dev_kfree_skb(lp->rx_skb[i]); + lp->rx_skb[i] = NULL; + } + printk(KERN_ERR "%s: couldn't map rx DMA buffers\n", + dev->name); + return -ENOMEM; + } + lp->rx_laddr[i] = laddr; + } + /* * Initialize the SONIC */ @@ -67,7 +131,8 @@ static int sonic_open(struct net_device *dev) */ static int sonic_close(struct net_device *dev) { - unsigned int base_addr = dev->base_addr; + struct sonic_local *lp = netdev_priv(dev); + int i; if (sonic_debug > 2) printk("sonic_close\n"); @@ -77,20 +142,56 @@ static int sonic_close(struct net_device *dev) /* * stop the SONIC, disable interrupts */ - SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_IMR, 0); + SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); - sonic_free_irq(dev->irq, dev); /* release the IRQ */ + /* unmap and free skbs that haven't been transmitted */ + for (i = 0; i < SONIC_NUM_TDS; i++) { + if(lp->tx_laddr[i]) { + dma_unmap_single(lp->device, lp->tx_laddr[i], lp->tx_len[i], DMA_TO_DEVICE); + lp->tx_laddr[i] = (dma_addr_t)0; + } + if(lp->tx_skb[i]) { + dev_kfree_skb(lp->tx_skb[i]); + lp->tx_skb[i] = NULL; + } + } + + /* unmap and free the receive buffers */ + for (i = 0; i < SONIC_NUM_RRS; i++) { + if(lp->rx_laddr[i]) { + dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE); + lp->rx_laddr[i] = (dma_addr_t)0; + } + if(lp->rx_skb[i]) { + dev_kfree_skb(lp->rx_skb[i]); + lp->rx_skb[i] = NULL; + } + } + + free_irq(dev->irq, dev); /* release the IRQ */ return 0; } static void sonic_tx_timeout(struct net_device *dev) { - struct sonic_local *lp = (struct sonic_local *) dev->priv; - printk("%s: transmit timed out.\n", dev->name); - + struct sonic_local *lp = netdev_priv(dev); + int i; + /* Stop the interrupts for this */ + SONIC_WRITE(SONIC_IMR, 0); + /* We could resend the original skbs. Easier to re-initialise. */ + for (i = 0; i < SONIC_NUM_TDS; i++) { + if(lp->tx_laddr[i]) { + dma_unmap_single(lp->device, lp->tx_laddr[i], lp->tx_len[i], DMA_TO_DEVICE); + lp->tx_laddr[i] = (dma_addr_t)0; + } + if(lp->tx_skb[i]) { + dev_kfree_skb(lp->tx_skb[i]); + lp->tx_skb[i] = NULL; + } + } /* Try to restart the adaptor. */ sonic_init(dev); lp->stats.tx_errors++; @@ -100,60 +201,92 @@ static void sonic_tx_timeout(struct net_device *dev) /* * transmit packet + * + * Appends new TD during transmission thus avoiding any TX interrupts + * until we run out of TDs. + * This routine interacts closely with the ISR in that it may, + * set tx_skb[i] + * reset the status flags of the new TD + * set and reset EOL flags + * stop the tx queue + * The ISR interacts with this routine in various ways. It may, + * reset tx_skb[i] + * test the EOL and status flags of the TDs + * wake the tx queue + * Concurrently with all of this, the SONIC is potentially writing to + * the status flags of the TDs. + * Until some mutual exclusion is added, this code will not work with SMP. However, + * MIPS Jazz machines and m68k Macs were all uni-processor machines. */ + static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) { - struct sonic_local *lp = (struct sonic_local *) dev->priv; - unsigned int base_addr = dev->base_addr; - unsigned int laddr; - int entry, length; - - netif_stop_queue(dev); + struct sonic_local *lp = netdev_priv(dev); + dma_addr_t laddr; + int length; + int entry = lp->next_tx; if (sonic_debug > 2) printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev); + length = skb->len; + if (length < ETH_ZLEN) { + skb = skb_padto(skb, ETH_ZLEN); + if (skb == NULL) + return 0; + length = ETH_ZLEN; + } + /* * Map the packet data into the logical DMA address space */ - if ((laddr = vdma_alloc(CPHYSADDR(skb->data), skb->len)) == ~0UL) { - printk("%s: no VDMA entry for transmit available.\n", - dev->name); + + laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); + if (!laddr) { + printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb(skb); - netif_start_queue(dev); return 1; } - entry = lp->cur_tx & SONIC_TDS_MASK; + + sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ + sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1); /* single fragment */ + sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */ + sonic_tda_put(dev, entry, SONIC_TD_FRAG_PTR_L, laddr & 0xffff); + sonic_tda_put(dev, entry, SONIC_TD_FRAG_PTR_H, laddr >> 16); + sonic_tda_put(dev, entry, SONIC_TD_FRAG_SIZE, length); + sonic_tda_put(dev, entry, SONIC_TD_LINK, + sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL); + + /* + * Must set tx_skb[entry] only after clearing status, and + * before clearing EOL and before stopping queue + */ + wmb(); + lp->tx_len[entry] = length; lp->tx_laddr[entry] = laddr; lp->tx_skb[entry] = skb; - length = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len; - flush_cache_all(); + wmb(); + sonic_tda_put(dev, lp->eol_tx, SONIC_TD_LINK, + sonic_tda_get(dev, lp->eol_tx, SONIC_TD_LINK) & ~SONIC_EOL); + lp->eol_tx = entry; - /* - * Setup the transmit descriptor and issue the transmit command. - */ - lp->tda[entry].tx_status = 0; /* clear status */ - lp->tda[entry].tx_frag_count = 1; /* single fragment */ - lp->tda[entry].tx_pktsize = length; /* length of packet */ - lp->tda[entry].tx_frag_ptr_l = laddr & 0xffff; - lp->tda[entry].tx_frag_ptr_h = laddr >> 16; - lp->tda[entry].tx_frag_size = length; - lp->cur_tx++; - lp->stats.tx_bytes += length; + lp->next_tx = (entry + 1) & SONIC_TDS_MASK; + if (lp->tx_skb[lp->next_tx] != NULL) { + /* The ring is full, the ISR has yet to process the next TD. */ + if (sonic_debug > 3) + printk("%s: stopping queue\n", dev->name); + netif_stop_queue(dev); + /* after this packet, wait for ISR to free up some TDAs */ + } else netif_start_queue(dev); if (sonic_debug > 2) - printk("sonic_send_packet: issueing Tx command\n"); + printk("sonic_send_packet: issuing Tx command\n"); SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); dev->trans_start = jiffies; - if (lp->cur_tx < lp->dirty_tx + SONIC_NUM_TDS) - netif_start_queue(dev); - else - lp->tx_full = 1; - return 0; } @@ -164,175 +297,199 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) static irqreturn_t sonic_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = (struct net_device *) dev_id; - unsigned int base_addr = dev->base_addr; - struct sonic_local *lp; + struct sonic_local *lp = netdev_priv(dev); int status; if (dev == NULL) { - printk("sonic_interrupt: irq %d for unknown device.\n", irq); + printk(KERN_ERR "sonic_interrupt: irq %d for unknown device.\n", irq); return IRQ_NONE; } - lp = (struct sonic_local *) dev->priv; + if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) + return IRQ_NONE; - status = SONIC_READ(SONIC_ISR); - SONIC_WRITE(SONIC_ISR, 0x7fff); /* clear all bits */ + do { + if (status & SONIC_INT_PKTRX) { + if (sonic_debug > 2) + printk("%s: packet rx\n", dev->name); + sonic_rx(dev); /* got packet(s) */ + SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */ + } - if (sonic_debug > 2) - printk("sonic_interrupt: ISR=%x\n", status); + if (status & SONIC_INT_TXDN) { + int entry = lp->cur_tx; + int td_status; + int freed_some = 0; - if (status & SONIC_INT_PKTRX) { - sonic_rx(dev); /* got packet(s) */ - } + /* At this point, cur_tx is the index of a TD that is one of: + * unallocated/freed (status set & tx_skb[entry] clear) + * allocated and sent (status set & tx_skb[entry] set ) + * allocated and not yet sent (status clear & tx_skb[entry] set ) + * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) + */ - if (status & SONIC_INT_TXDN) { - int dirty_tx = lp->dirty_tx; + if (sonic_debug > 2) + printk("%s: tx done\n", dev->name); - while (dirty_tx < lp->cur_tx) { - int entry = dirty_tx & SONIC_TDS_MASK; - int status = lp->tda[entry].tx_status; + while (lp->tx_skb[entry] != NULL) { + if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0) + break; - if (sonic_debug > 3) - printk - ("sonic_interrupt: status %d, cur_tx %d, dirty_tx %d\n", - status, lp->cur_tx, lp->dirty_tx); + if (td_status & 0x0001) { + lp->stats.tx_packets++; + lp->stats.tx_bytes += sonic_tda_get(dev, entry, SONIC_TD_PKTSIZE); + } else { + lp->stats.tx_errors++; + if (td_status & 0x0642) + lp->stats.tx_aborted_errors++; + if (td_status & 0x0180) + lp->stats.tx_carrier_errors++; + if (td_status & 0x0020) + lp->stats.tx_window_errors++; + if (td_status & 0x0004) + lp->stats.tx_fifo_errors++; + } - if (status == 0) { - /* It still hasn't been Txed, kick the sonic again */ - SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); - break; - } - - /* put back EOL and free descriptor */ - lp->tda[entry].tx_frag_count = 0; - lp->tda[entry].tx_status = 0; - - if (status & 0x0001) - lp->stats.tx_packets++; - else { - lp->stats.tx_errors++; - if (status & 0x0642) - lp->stats.tx_aborted_errors++; - if (status & 0x0180) - lp->stats.tx_carrier_errors++; - if (status & 0x0020) - lp->stats.tx_window_errors++; - if (status & 0x0004) - lp->stats.tx_fifo_errors++; - } - - /* We must free the original skb */ - if (lp->tx_skb[entry]) { + /* We must free the original skb */ dev_kfree_skb_irq(lp->tx_skb[entry]); - lp->tx_skb[entry] = 0; + lp->tx_skb[entry] = NULL; + /* and unmap DMA buffer */ + dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE); + lp->tx_laddr[entry] = (dma_addr_t)0; + freed_some = 1; + + if (sonic_tda_get(dev, entry, SONIC_TD_LINK) & SONIC_EOL) { + entry = (entry + 1) & SONIC_TDS_MASK; + break; + } + entry = (entry + 1) & SONIC_TDS_MASK; } - /* and the VDMA address */ - vdma_free(lp->tx_laddr[entry]); - dirty_tx++; + + if (freed_some || lp->tx_skb[entry] == NULL) + netif_wake_queue(dev); /* The ring is no longer full */ + lp->cur_tx = entry; + SONIC_WRITE(SONIC_ISR, SONIC_INT_TXDN); /* clear the interrupt */ } - if (lp->tx_full - && dirty_tx + SONIC_NUM_TDS > lp->cur_tx + 2) { - /* The ring is no longer full, clear tbusy. */ - lp->tx_full = 0; - netif_wake_queue(dev); + /* + * check error conditions + */ + if (status & SONIC_INT_RFO) { + if (sonic_debug > 1) + printk("%s: rx fifo overrun\n", dev->name); + lp->stats.rx_fifo_errors++; + SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */ + } + if (status & SONIC_INT_RDE) { + if (sonic_debug > 1) + printk("%s: rx descriptors exhausted\n", dev->name); + lp->stats.rx_dropped++; + SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */ + } + if (status & SONIC_INT_RBAE) { + if (sonic_debug > 1) + printk("%s: rx buffer area exceeded\n", dev->name); + lp->stats.rx_dropped++; + SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */ } - lp->dirty_tx = dirty_tx; - } + /* counter overruns; all counters are 16bit wide */ + if (status & SONIC_INT_FAE) { + lp->stats.rx_frame_errors += 65536; + SONIC_WRITE(SONIC_ISR, SONIC_INT_FAE); /* clear the interrupt */ + } + if (status & SONIC_INT_CRC) { + lp->stats.rx_crc_errors += 65536; + SONIC_WRITE(SONIC_ISR, SONIC_INT_CRC); /* clear the interrupt */ + } + if (status & SONIC_INT_MP) { + lp->stats.rx_missed_errors += 65536; + SONIC_WRITE(SONIC_ISR, SONIC_INT_MP); /* clear the interrupt */ + } - /* - * check error conditions - */ - if (status & SONIC_INT_RFO) { - printk("%s: receive fifo underrun\n", dev->name); - lp->stats.rx_fifo_errors++; - } - if (status & SONIC_INT_RDE) { - printk("%s: receive descriptors exhausted\n", dev->name); - lp->stats.rx_dropped++; - } - if (status & SONIC_INT_RBE) { - printk("%s: receive buffer exhausted\n", dev->name); - lp->stats.rx_dropped++; - } - if (status & SONIC_INT_RBAE) { - printk("%s: receive buffer area exhausted\n", dev->name); - lp->stats.rx_dropped++; - } + /* transmit error */ + if (status & SONIC_INT_TXER) { + if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2)) + printk(KERN_ERR "%s: tx fifo underrun\n", dev->name); + SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */ + } - /* counter overruns; all counters are 16bit wide */ - if (status & SONIC_INT_FAE) - lp->stats.rx_frame_errors += 65536; - if (status & SONIC_INT_CRC) - lp->stats.rx_crc_errors += 65536; - if (status & SONIC_INT_MP) - lp->stats.rx_missed_errors += 65536; + /* bus retry */ + if (status & SONIC_INT_BR) { + printk(KERN_ERR "%s: Bus retry occurred! Device interrupt disabled.\n", + dev->name); + /* ... to help debug DMA problems causing endless interrupts. */ + /* Bounce the eth interface to turn on the interrupt again. */ + SONIC_WRITE(SONIC_IMR, 0); + SONIC_WRITE(SONIC_ISR, SONIC_INT_BR); /* clear the interrupt */ + } - /* transmit error */ - if (status & SONIC_INT_TXER) - lp->stats.tx_errors++; - - /* - * clear interrupt bits and return - */ - SONIC_WRITE(SONIC_ISR, status); + /* load CAM done */ + if (status & SONIC_INT_LCD) + SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ + } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)); return IRQ_HANDLED; } /* - * We have a good packet(s), get it/them out of the buffers. + * We have a good packet(s), pass it/them up the network stack. */ static void sonic_rx(struct net_device *dev) { - unsigned int base_addr = dev->base_addr; - struct sonic_local *lp = (struct sonic_local *) dev->priv; - sonic_rd_t *rd = &lp->rda[lp->cur_rx & SONIC_RDS_MASK]; + struct sonic_local *lp = netdev_priv(dev); int status; + int entry = lp->cur_rx; - while (rd->in_use == 0) { - struct sk_buff *skb; + while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) { + struct sk_buff *used_skb; + struct sk_buff *new_skb; + dma_addr_t new_laddr; + u16 bufadr_l; + u16 bufadr_h; int pkt_len; - unsigned char *pkt_ptr; - status = rd->rx_status; - if (sonic_debug > 3) - printk("status %x, cur_rx %d, cur_rra %x\n", - status, lp->cur_rx, lp->cur_rra); + status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); if (status & SONIC_RCR_PRX) { - pkt_len = rd->rx_pktlen; - pkt_ptr = - (char *) - sonic_chiptomem((rd->rx_pktptr_h << 16) + - rd->rx_pktptr_l); - - if (sonic_debug > 3) - printk - ("pktptr %p (rba %p) h:%x l:%x, bsize h:%x l:%x\n", - pkt_ptr, lp->rba, rd->rx_pktptr_h, - rd->rx_pktptr_l, - SONIC_READ(SONIC_RBWC1), - SONIC_READ(SONIC_RBWC0)); - /* Malloc up new buffer. */ - skb = dev_alloc_skb(pkt_len + 2); - if (skb == NULL) { - printk - ("%s: Memory squeeze, dropping packet.\n", - dev->name); + new_skb = dev_alloc_skb(SONIC_RBSIZE + 2); + if (new_skb == NULL) { + printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", dev->name); lp->stats.rx_dropped++; break; } - skb->dev = dev; - skb_reserve(skb, 2); /* 16 byte align */ - skb_put(skb, pkt_len); /* Make room */ - eth_copy_and_sum(skb, pkt_ptr, pkt_len, 0); - skb->protocol = eth_type_trans(skb, dev); - netif_rx(skb); /* pass the packet to upper layers */ + new_skb->dev = dev; + /* provide 16 byte IP header alignment unless DMA requires otherwise */ + if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2) + skb_reserve(new_skb, 2); + + new_laddr = dma_map_single(lp->device, skb_put(new_skb, SONIC_RBSIZE), + SONIC_RBSIZE, DMA_FROM_DEVICE); + if (!new_laddr) { + dev_kfree_skb(new_skb); + printk(KERN_ERR "%s: Failed to map rx buffer, dropping packet.\n", dev->name); + lp->stats.rx_dropped++; + break; + } + + /* now we have a new skb to replace it, pass the used one up the stack */ + dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); + used_skb = lp->rx_skb[entry]; + pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); + skb_trim(used_skb, pkt_len); + used_skb->protocol = eth_type_trans(used_skb, dev); + netif_rx(used_skb); dev->last_rx = jiffies; lp->stats.rx_packets++; lp->stats.rx_bytes += pkt_len; + /* and insert the new skb */ + lp->rx_laddr[entry] = new_laddr; + lp->rx_skb[entry] = new_skb; + + bufadr_l = (unsigned long)new_laddr & 0xffff; + bufadr_h = (unsigned long)new_laddr >> 16; + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ lp->stats.rx_errors++; @@ -341,29 +498,35 @@ static void sonic_rx(struct net_device *dev) if (status & SONIC_RCR_CRCR) lp->stats.rx_crc_errors++; } - - rd->in_use = 1; - rd = &lp->rda[(++lp->cur_rx) & SONIC_RDS_MASK]; - /* now give back the buffer to the receive buffer area */ if (status & SONIC_RCR_LPKT) { /* - * this was the last packet out of the current receice buffer + * this was the last packet out of the current receive buffer * give the buffer back to the SONIC */ - lp->cur_rra += sizeof(sonic_rr_t); - if (lp->cur_rra > - (lp->rra_laddr + - (SONIC_NUM_RRS - - 1) * sizeof(sonic_rr_t))) lp->cur_rra = - lp->rra_laddr; - SONIC_WRITE(SONIC_RWP, lp->cur_rra & 0xffff); + lp->cur_rwp += SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode); + if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff; + SONIC_WRITE(SONIC_RWP, lp->cur_rwp); + if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) { + if (sonic_debug > 2) + printk("%s: rx buffer exhausted\n", dev->name); + SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ + } } else - printk - ("%s: rx desc without RCR_LPKT. Shouldn't happen !?\n", + printk(KERN_ERR "%s: rx desc without RCR_LPKT. Shouldn't happen !?\n", dev->name); + /* + * give back the descriptor + */ + sonic_rda_put(dev, entry, SONIC_RD_LINK, + sonic_rda_get(dev, entry, SONIC_RD_LINK) | SONIC_EOL); + sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1); + sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, + sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK) & ~SONIC_EOL); + lp->eol_rx = entry; + lp->cur_rx = entry = (entry + 1) & SONIC_RDS_MASK; } /* - * If any worth-while packets have been received, dev_rint() + * If any worth-while packets have been received, netif_rx() * has done a mark_bh(NET_BH) for us and will work on them * when we get to the bottom-half routine. */ @@ -376,8 +539,7 @@ static void sonic_rx(struct net_device *dev) */ static struct net_device_stats *sonic_get_stats(struct net_device *dev) { - struct sonic_local *lp = (struct sonic_local *) dev->priv; - unsigned int base_addr = dev->base_addr; + struct sonic_local *lp = netdev_priv(dev); /* read the tally counter from the SONIC and reset them */ lp->stats.rx_crc_errors += SONIC_READ(SONIC_CRCT); @@ -396,8 +558,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev) */ static void sonic_multicast_list(struct net_device *dev) { - struct sonic_local *lp = (struct sonic_local *) dev->priv; - unsigned int base_addr = dev->base_addr; + struct sonic_local *lp = netdev_priv(dev); unsigned int rcr; struct dev_mc_list *dmi = dev->mc_list; unsigned char *addr; @@ -413,20 +574,15 @@ static void sonic_multicast_list(struct net_device *dev) rcr |= SONIC_RCR_AMC; } else { if (sonic_debug > 2) - printk - ("sonic_multicast_list: mc_count %d\n", - dev->mc_count); - lp->cda.cam_enable = 1; /* always enable our own address */ + printk("sonic_multicast_list: mc_count %d\n", dev->mc_count); + sonic_set_cam_enable(dev, 1); /* always enable our own address */ for (i = 1; i <= dev->mc_count; i++) { addr = dmi->dmi_addr; dmi = dmi->next; - lp->cda.cam_desc[i].cam_cap0 = - addr[1] << 8 | addr[0]; - lp->cda.cam_desc[i].cam_cap1 = - addr[3] << 8 | addr[2]; - lp->cda.cam_desc[i].cam_cap2 = - addr[5] << 8 | addr[4]; - lp->cda.cam_enable |= (1 << i); + sonic_cda_put(dev, i, SONIC_CD_CAP0, addr[1] << 8 | addr[0]); + sonic_cda_put(dev, i, SONIC_CD_CAP1, addr[3] << 8 | addr[2]); + sonic_cda_put(dev, i, SONIC_CD_CAP2, addr[5] << 8 | addr[4]); + sonic_set_cam_enable(dev, sonic_get_cam_enable(dev) | (1 << i)); } SONIC_WRITE(SONIC_CDC, 16); /* issue Load CAM command */ @@ -447,19 +603,16 @@ static void sonic_multicast_list(struct net_device *dev) */ static int sonic_init(struct net_device *dev) { - unsigned int base_addr = dev->base_addr; unsigned int cmd; - struct sonic_local *lp = (struct sonic_local *) dev->priv; - unsigned int rra_start; - unsigned int rra_end; + struct sonic_local *lp = netdev_priv(dev); int i; /* * put the Sonic into software-reset mode and * disable all interrupts */ - SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_IMR, 0); + SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); /* @@ -475,34 +628,32 @@ static int sonic_init(struct net_device *dev) if (sonic_debug > 2) printk("sonic_init: initialize receive resource area\n"); - rra_start = lp->rra_laddr & 0xffff; - rra_end = - (rra_start + (SONIC_NUM_RRS * sizeof(sonic_rr_t))) & 0xffff; - for (i = 0; i < SONIC_NUM_RRS; i++) { - lp->rra[i].rx_bufadr_l = - (lp->rba_laddr + i * SONIC_RBSIZE) & 0xffff; - lp->rra[i].rx_bufadr_h = - (lp->rba_laddr + i * SONIC_RBSIZE) >> 16; - lp->rra[i].rx_bufsize_l = SONIC_RBSIZE >> 1; - lp->rra[i].rx_bufsize_h = 0; + u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff; + u16 bufadr_h = (unsigned long)lp->rx_laddr[i] >> 16; + sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); + sonic_rra_put(dev, i, SONIC_RR_BUFSIZE_L, SONIC_RBSIZE >> 1); + sonic_rra_put(dev, i, SONIC_RR_BUFSIZE_H, 0); } /* initialize all RRA registers */ - SONIC_WRITE(SONIC_RSA, rra_start); - SONIC_WRITE(SONIC_REA, rra_end); - SONIC_WRITE(SONIC_RRP, rra_start); - SONIC_WRITE(SONIC_RWP, rra_end); + lp->rra_end = (lp->rra_laddr + SONIC_NUM_RRS * SIZEOF_SONIC_RR * + SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; + lp->cur_rwp = (lp->rra_laddr + (SONIC_NUM_RRS - 1) * SIZEOF_SONIC_RR * + SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; + + SONIC_WRITE(SONIC_RSA, lp->rra_laddr & 0xffff); + SONIC_WRITE(SONIC_REA, lp->rra_end); + SONIC_WRITE(SONIC_RRP, lp->rra_laddr & 0xffff); + SONIC_WRITE(SONIC_RWP, lp->cur_rwp); SONIC_WRITE(SONIC_URRA, lp->rra_laddr >> 16); - SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE - 2) >> 1); - - lp->cur_rra = - lp->rra_laddr + (SONIC_NUM_RRS - 1) * sizeof(sonic_rr_t); + SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1)); /* load the resource pointers */ if (sonic_debug > 3) - printk("sonic_init: issueing RRRA command\n"); - + printk("sonic_init: issuing RRRA command\n"); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA); i = 0; while (i++ < 100) { @@ -511,27 +662,30 @@ static int sonic_init(struct net_device *dev) } if (sonic_debug > 2) - printk("sonic_init: status=%x\n", SONIC_READ(SONIC_CMD)); - + printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i); + /* * Initialize the receive descriptors so that they * become a circular linked list, ie. let the last * descriptor point to the first again. */ if (sonic_debug > 2) - printk("sonic_init: initialize receive descriptors\n"); - for (i = 0; i < SONIC_NUM_RDS; i++) { - lp->rda[i].rx_status = 0; - lp->rda[i].rx_pktlen = 0; - lp->rda[i].rx_pktptr_l = 0; - lp->rda[i].rx_pktptr_h = 0; - lp->rda[i].rx_seqno = 0; - lp->rda[i].in_use = 1; - lp->rda[i].link = - lp->rda_laddr + (i + 1) * sizeof(sonic_rd_t); + printk("sonic_init: initialize receive descriptors\n"); + for (i=0; irda_laddr + + ((i+1) * SIZEOF_SONIC_RD * SONIC_BUS_SCALE(lp->dma_bitmode))); } /* fix last descriptor */ - lp->rda[SONIC_NUM_RDS - 1].link = lp->rda_laddr; + sonic_rda_put(dev, SONIC_NUM_RDS - 1, SONIC_RD_LINK, + (lp->rda_laddr & 0xffff) | SONIC_EOL); + lp->eol_rx = SONIC_NUM_RDS - 1; lp->cur_rx = 0; SONIC_WRITE(SONIC_URDA, lp->rda_laddr >> 16); SONIC_WRITE(SONIC_CRDA, lp->rda_laddr & 0xffff); @@ -542,34 +696,34 @@ static int sonic_init(struct net_device *dev) if (sonic_debug > 2) printk("sonic_init: initialize transmit descriptors\n"); for (i = 0; i < SONIC_NUM_TDS; i++) { - lp->tda[i].tx_status = 0; - lp->tda[i].tx_config = 0; - lp->tda[i].tx_pktsize = 0; - lp->tda[i].tx_frag_count = 0; - lp->tda[i].link = - (lp->tda_laddr + - (i + 1) * sizeof(sonic_td_t)) | SONIC_END_OF_LINKS; + sonic_tda_put(dev, i, SONIC_TD_STATUS, 0); + sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0); + sonic_tda_put(dev, i, SONIC_TD_PKTSIZE, 0); + sonic_tda_put(dev, i, SONIC_TD_FRAG_COUNT, 0); + sonic_tda_put(dev, i, SONIC_TD_LINK, + (lp->tda_laddr & 0xffff) + + (i + 1) * SIZEOF_SONIC_TD * SONIC_BUS_SCALE(lp->dma_bitmode)); + lp->tx_skb[i] = NULL; } - lp->tda[SONIC_NUM_TDS - 1].link = - (lp->tda_laddr & 0xffff) | SONIC_END_OF_LINKS; + /* fix last descriptor */ + sonic_tda_put(dev, SONIC_NUM_TDS - 1, SONIC_TD_LINK, + (lp->tda_laddr & 0xffff)); SONIC_WRITE(SONIC_UTDA, lp->tda_laddr >> 16); SONIC_WRITE(SONIC_CTDA, lp->tda_laddr & 0xffff); - lp->cur_tx = lp->dirty_tx = 0; - + lp->cur_tx = lp->next_tx = 0; + lp->eol_tx = SONIC_NUM_TDS - 1; + /* * put our own address to CAM desc[0] */ - lp->cda.cam_desc[0].cam_cap0 = - dev->dev_addr[1] << 8 | dev->dev_addr[0]; - lp->cda.cam_desc[0].cam_cap1 = - dev->dev_addr[3] << 8 | dev->dev_addr[2]; - lp->cda.cam_desc[0].cam_cap2 = - dev->dev_addr[5] << 8 | dev->dev_addr[4]; - lp->cda.cam_enable = 1; + sonic_cda_put(dev, 0, SONIC_CD_CAP0, dev->dev_addr[1] << 8 | dev->dev_addr[0]); + sonic_cda_put(dev, 0, SONIC_CD_CAP1, dev->dev_addr[3] << 8 | dev->dev_addr[2]); + sonic_cda_put(dev, 0, SONIC_CD_CAP2, dev->dev_addr[5] << 8 | dev->dev_addr[4]); + sonic_set_cam_enable(dev, 1); for (i = 0; i < 16; i++) - lp->cda.cam_desc[i].cam_entry_pointer = i; + sonic_cda_put(dev, i, SONIC_CD_ENTRY_POINTER, i); /* * initialize CAM registers @@ -588,8 +742,8 @@ static int sonic_init(struct net_device *dev) break; } if (sonic_debug > 2) { - printk("sonic_init: CMD=%x, ISR=%x\n", - SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR)); + printk("sonic_init: CMD=%x, ISR=%x\n, i=%d", + SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i); } /* @@ -604,7 +758,7 @@ static int sonic_init(struct net_device *dev) cmd = SONIC_READ(SONIC_CMD); if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0) - printk("sonic_init: failed, status=%x\n", cmd); + printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd); if (sonic_debug > 2) printk("sonic_init: new status=%x\n", diff --git a/drivers/net/sonic.h b/drivers/net/sonic.h index c4a6d58e4afb..cede969a8baa 100644 --- a/drivers/net/sonic.h +++ b/drivers/net/sonic.h @@ -1,5 +1,5 @@ /* - * Helpfile for sonic.c + * Header file for sonic.c * * (C) Waldorf Electronics, Germany * Written by Andreas Busse @@ -9,10 +9,16 @@ * and pad structure members must be exchanged. Also, the structures * need to be changed accordingly to the bus size. * - * 981229 MSch: did just that for the 68k Mac port (32 bit, big endian), - * see CONFIG_MACSONIC branch below. + * 981229 MSch: did just that for the 68k Mac port (32 bit, big endian) * + * 990611 David Huggins-Daines : This machine abstraction + * does not cope with 16-bit bus sizes very well. Therefore I have + * rewritten it with ugly macros and evil inlines. + * + * 050625 Finn Thain: introduced more 32-bit cards and dhd's support + * for 16-bit cards (from the mac68k project). */ + #ifndef SONIC_H #define SONIC_H @@ -83,6 +89,7 @@ /* * Error counters */ + #define SONIC_CRCT 0x2c #define SONIC_FAET 0x2d #define SONIC_MPT 0x2e @@ -182,14 +189,14 @@ #define SONIC_INT_BR 0x4000 #define SONIC_INT_HBL 0x2000 -#define SONIC_INT_LCD 0x1000 -#define SONIC_INT_PINT 0x0800 -#define SONIC_INT_PKTRX 0x0400 -#define SONIC_INT_TXDN 0x0200 -#define SONIC_INT_TXER 0x0100 -#define SONIC_INT_TC 0x0080 -#define SONIC_INT_RDE 0x0040 -#define SONIC_INT_RBE 0x0020 +#define SONIC_INT_LCD 0x1000 +#define SONIC_INT_PINT 0x0800 +#define SONIC_INT_PKTRX 0x0400 +#define SONIC_INT_TXDN 0x0200 +#define SONIC_INT_TXER 0x0100 +#define SONIC_INT_TC 0x0080 +#define SONIC_INT_RDE 0x0040 +#define SONIC_INT_RBE 0x0020 #define SONIC_INT_RBAE 0x0010 #define SONIC_INT_CRC 0x0008 #define SONIC_INT_FAE 0x0004 @@ -201,224 +208,61 @@ * The interrupts we allow. */ -#define SONIC_IMR_DEFAULT (SONIC_INT_BR | \ - SONIC_INT_LCD | \ - SONIC_INT_PINT | \ +#define SONIC_IMR_DEFAULT ( SONIC_INT_BR | \ + SONIC_INT_LCD | \ + SONIC_INT_RFO | \ SONIC_INT_PKTRX | \ SONIC_INT_TXDN | \ SONIC_INT_TXER | \ SONIC_INT_RDE | \ - SONIC_INT_RBE | \ SONIC_INT_RBAE | \ SONIC_INT_CRC | \ SONIC_INT_FAE | \ SONIC_INT_MP) -#define SONIC_END_OF_LINKS 0x0001 - - -#ifdef CONFIG_MACSONIC -/* - * Big endian like structures on 680x0 Macs - */ - -typedef struct { - u32 rx_bufadr_l; /* receive buffer ptr */ - u32 rx_bufadr_h; - - u32 rx_bufsize_l; /* no. of words in the receive buffer */ - u32 rx_bufsize_h; -} sonic_rr_t; - -/* - * Sonic receive descriptor. Receive descriptors are - * kept in a linked list of these structures. - */ - -typedef struct { - SREGS_PAD(pad0); - u16 rx_status; /* status after reception of a packet */ - SREGS_PAD(pad1); - u16 rx_pktlen; /* length of the packet incl. CRC */ - - /* - * Pointers to the location in the receive buffer area (RBA) - * where the packet resides. A packet is always received into - * a contiguous piece of memory. - */ - SREGS_PAD(pad2); - u16 rx_pktptr_l; - SREGS_PAD(pad3); - u16 rx_pktptr_h; - - SREGS_PAD(pad4); - u16 rx_seqno; /* sequence no. */ - - SREGS_PAD(pad5); - u16 link; /* link to next RDD (end if EOL bit set) */ - - /* - * Owner of this descriptor, 0= driver, 1=sonic - */ - - SREGS_PAD(pad6); - u16 in_use; - - caddr_t rda_next; /* pointer to next RD */ -} sonic_rd_t; - - -/* - * Describes a Transmit Descriptor - */ -typedef struct { - SREGS_PAD(pad0); - u16 tx_status; /* status after transmission of a packet */ - SREGS_PAD(pad1); - u16 tx_config; /* transmit configuration for this packet */ - SREGS_PAD(pad2); - u16 tx_pktsize; /* size of the packet to be transmitted */ - SREGS_PAD(pad3); - u16 tx_frag_count; /* no. of fragments */ - - SREGS_PAD(pad4); - u16 tx_frag_ptr_l; - SREGS_PAD(pad5); - u16 tx_frag_ptr_h; - SREGS_PAD(pad6); - u16 tx_frag_size; - - SREGS_PAD(pad7); - u16 link; /* ptr to next descriptor */ -} sonic_td_t; - - -/* - * Describes an entry in the CAM Descriptor Area. - */ - -typedef struct { - SREGS_PAD(pad0); - u16 cam_entry_pointer; - SREGS_PAD(pad1); - u16 cam_cap0; - SREGS_PAD(pad2); - u16 cam_cap1; - SREGS_PAD(pad3); - u16 cam_cap2; -} sonic_cd_t; - +#define SONIC_EOL 0x0001 #define CAM_DESCRIPTORS 16 +/* Offsets in the various DMA buffers accessed by the SONIC */ -typedef struct { - sonic_cd_t cam_desc[CAM_DESCRIPTORS]; - SREGS_PAD(pad); - u16 cam_enable; -} sonic_cda_t; +#define SONIC_BITMODE16 0 +#define SONIC_BITMODE32 1 +#define SONIC_BUS_SCALE(bitmode) ((bitmode) ? 4 : 2) +/* Note! These are all measured in bus-size units, so use SONIC_BUS_SCALE */ +#define SIZEOF_SONIC_RR 4 +#define SONIC_RR_BUFADR_L 0 +#define SONIC_RR_BUFADR_H 1 +#define SONIC_RR_BUFSIZE_L 2 +#define SONIC_RR_BUFSIZE_H 3 -#else /* original declarations, little endian 32 bit */ +#define SIZEOF_SONIC_RD 7 +#define SONIC_RD_STATUS 0 +#define SONIC_RD_PKTLEN 1 +#define SONIC_RD_PKTPTR_L 2 +#define SONIC_RD_PKTPTR_H 3 +#define SONIC_RD_SEQNO 4 +#define SONIC_RD_LINK 5 +#define SONIC_RD_IN_USE 6 -/* - * structure definitions - */ +#define SIZEOF_SONIC_TD 8 +#define SONIC_TD_STATUS 0 +#define SONIC_TD_CONFIG 1 +#define SONIC_TD_PKTSIZE 2 +#define SONIC_TD_FRAG_COUNT 3 +#define SONIC_TD_FRAG_PTR_L 4 +#define SONIC_TD_FRAG_PTR_H 5 +#define SONIC_TD_FRAG_SIZE 6 +#define SONIC_TD_LINK 7 -typedef struct { - u32 rx_bufadr_l; /* receive buffer ptr */ - u32 rx_bufadr_h; +#define SIZEOF_SONIC_CD 4 +#define SONIC_CD_ENTRY_POINTER 0 +#define SONIC_CD_CAP0 1 +#define SONIC_CD_CAP1 2 +#define SONIC_CD_CAP2 3 - u32 rx_bufsize_l; /* no. of words in the receive buffer */ - u32 rx_bufsize_h; -} sonic_rr_t; - -/* - * Sonic receive descriptor. Receive descriptors are - * kept in a linked list of these structures. - */ - -typedef struct { - u16 rx_status; /* status after reception of a packet */ - SREGS_PAD(pad0); - u16 rx_pktlen; /* length of the packet incl. CRC */ - SREGS_PAD(pad1); - - /* - * Pointers to the location in the receive buffer area (RBA) - * where the packet resides. A packet is always received into - * a contiguous piece of memory. - */ - u16 rx_pktptr_l; - SREGS_PAD(pad2); - u16 rx_pktptr_h; - SREGS_PAD(pad3); - - u16 rx_seqno; /* sequence no. */ - SREGS_PAD(pad4); - - u16 link; /* link to next RDD (end if EOL bit set) */ - SREGS_PAD(pad5); - - /* - * Owner of this descriptor, 0= driver, 1=sonic - */ - - u16 in_use; - SREGS_PAD(pad6); - - caddr_t rda_next; /* pointer to next RD */ -} sonic_rd_t; - - -/* - * Describes a Transmit Descriptor - */ -typedef struct { - u16 tx_status; /* status after transmission of a packet */ - SREGS_PAD(pad0); - u16 tx_config; /* transmit configuration for this packet */ - SREGS_PAD(pad1); - u16 tx_pktsize; /* size of the packet to be transmitted */ - SREGS_PAD(pad2); - u16 tx_frag_count; /* no. of fragments */ - SREGS_PAD(pad3); - - u16 tx_frag_ptr_l; - SREGS_PAD(pad4); - u16 tx_frag_ptr_h; - SREGS_PAD(pad5); - u16 tx_frag_size; - SREGS_PAD(pad6); - - u16 link; /* ptr to next descriptor */ - SREGS_PAD(pad7); -} sonic_td_t; - - -/* - * Describes an entry in the CAM Descriptor Area. - */ - -typedef struct { - u16 cam_entry_pointer; - SREGS_PAD(pad0); - u16 cam_cap0; - SREGS_PAD(pad1); - u16 cam_cap1; - SREGS_PAD(pad2); - u16 cam_cap2; - SREGS_PAD(pad3); -} sonic_cd_t; - -#define CAM_DESCRIPTORS 16 - - -typedef struct { - sonic_cd_t cam_desc[CAM_DESCRIPTORS]; - u16 cam_enable; - SREGS_PAD(pad); -} sonic_cda_t; -#endif /* endianness */ +#define SIZEOF_SONIC_CDA ((CAM_DESCRIPTORS * SIZEOF_SONIC_CD) + 1) +#define SONIC_CDA_CAM_ENABLE (CAM_DESCRIPTORS * SIZEOF_SONIC_CD) /* * Some tunables for the buffer areas. Power of 2 is required @@ -426,44 +270,60 @@ typedef struct { * * MSch: use more buffer space for the slow m68k Macs! */ -#ifdef CONFIG_MACSONIC -#define SONIC_NUM_RRS 32 /* number of receive resources */ -#define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ -#define SONIC_NUM_TDS 32 /* number of transmit descriptors */ -#else -#define SONIC_NUM_RRS 16 /* number of receive resources */ -#define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ -#define SONIC_NUM_TDS 16 /* number of transmit descriptors */ -#endif -#define SONIC_RBSIZE 1520 /* size of one resource buffer */ +#define SONIC_NUM_RRS 16 /* number of receive resources */ +#define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ +#define SONIC_NUM_TDS 16 /* number of transmit descriptors */ -#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) -#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) +#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RBSIZE 1520 /* size of one resource buffer */ + +/* Again, measured in bus size units! */ +#define SIZEOF_SONIC_DESC (SIZEOF_SONIC_CDA \ + + (SIZEOF_SONIC_TD * SONIC_NUM_TDS) \ + + (SIZEOF_SONIC_RD * SONIC_NUM_RDS) \ + + (SIZEOF_SONIC_RR * SONIC_NUM_RRS)) /* Information that need to be kept for each board. */ struct sonic_local { - sonic_cda_t cda; /* virtual CPU address of CDA */ - sonic_td_t tda[SONIC_NUM_TDS]; /* transmit descriptor area */ - sonic_rr_t rra[SONIC_NUM_RRS]; /* receive resource area */ - sonic_rd_t rda[SONIC_NUM_RDS]; /* receive descriptor area */ - struct sk_buff *tx_skb[SONIC_NUM_TDS]; /* skbuffs for packets to transmit */ - unsigned int tx_laddr[SONIC_NUM_TDS]; /* logical DMA address fro skbuffs */ - unsigned char *rba; /* start of receive buffer areas */ - unsigned int cda_laddr; /* logical DMA address of CDA */ - unsigned int tda_laddr; /* logical DMA address of TDA */ - unsigned int rra_laddr; /* logical DMA address of RRA */ - unsigned int rda_laddr; /* logical DMA address of RDA */ - unsigned int rba_laddr; /* logical DMA address of RBA */ - unsigned int cur_rra; /* current indexes to resource areas */ + /* Bus size. 0 == 16 bits, 1 == 32 bits. */ + int dma_bitmode; + /* Register offset within the longword (independent of endianness, + and varies from one type of Macintosh SONIC to another + (Aarrgh)) */ + int reg_offset; + void *descriptors; + /* Crud. These areas have to be within the same 64K. Therefore + we allocate a desriptors page, and point these to places within it. */ + void *cda; /* CAM descriptor area */ + void *tda; /* Transmit descriptor area */ + void *rra; /* Receive resource area */ + void *rda; /* Receive descriptor area */ + struct sk_buff* volatile rx_skb[SONIC_NUM_RRS]; /* packets to be received */ + struct sk_buff* volatile tx_skb[SONIC_NUM_TDS]; /* packets to be transmitted */ + unsigned int tx_len[SONIC_NUM_TDS]; /* lengths of tx DMA mappings */ + /* Logical DMA addresses on MIPS, bus addresses on m68k + * (so "laddr" is a bit misleading) */ + dma_addr_t descriptors_laddr; + u32 cda_laddr; /* logical DMA address of CDA */ + u32 tda_laddr; /* logical DMA address of TDA */ + u32 rra_laddr; /* logical DMA address of RRA */ + u32 rda_laddr; /* logical DMA address of RDA */ + dma_addr_t rx_laddr[SONIC_NUM_RRS]; /* logical DMA addresses of rx skbuffs */ + dma_addr_t tx_laddr[SONIC_NUM_TDS]; /* logical DMA addresses of tx skbuffs */ + unsigned int rra_end; + unsigned int cur_rwp; unsigned int cur_rx; - unsigned int cur_tx; - unsigned int dirty_tx; /* last unacked transmit packet */ - char tx_full; + unsigned int cur_tx; /* first unacked transmit packet */ + unsigned int eol_rx; + unsigned int eol_tx; /* last unacked transmit packet */ + unsigned int next_tx; /* next free TD */ + struct device *device; /* generic device */ struct net_device_stats stats; }; -#define TX_TIMEOUT 6 +#define TX_TIMEOUT (3 * HZ) /* Index to functions, as function prototypes. */ @@ -477,6 +337,114 @@ static void sonic_multicast_list(struct net_device *dev); static int sonic_init(struct net_device *dev); static void sonic_tx_timeout(struct net_device *dev); +/* Internal inlines for reading/writing DMA buffers. Note that bus + size and endianness matter here, whereas they don't for registers, + as far as we can tell. */ +/* OpenBSD calls this "SWO". I'd like to think that sonic_buf_put() + is a much better name. */ +static inline void sonic_buf_put(void* base, int bitmode, + int offset, __u16 val) +{ + if (bitmode) +#ifdef __BIG_ENDIAN + ((__u16 *) base + (offset*2))[1] = val; +#else + ((__u16 *) base + (offset*2))[0] = val; +#endif + else + ((__u16 *) base)[offset] = val; +} + +static inline __u16 sonic_buf_get(void* base, int bitmode, + int offset) +{ + if (bitmode) +#ifdef __BIG_ENDIAN + return ((volatile __u16 *) base + (offset*2))[1]; +#else + return ((volatile __u16 *) base + (offset*2))[0]; +#endif + else + return ((volatile __u16 *) base)[offset]; +} + +/* Inlines that you should actually use for reading/writing DMA buffers */ +static inline void sonic_cda_put(struct net_device* dev, int entry, + int offset, __u16 val) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + sonic_buf_put(lp->cda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_CD) + offset, val); +} + +static inline __u16 sonic_cda_get(struct net_device* dev, int entry, + int offset) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + return sonic_buf_get(lp->cda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_CD) + offset); +} + +static inline void sonic_set_cam_enable(struct net_device* dev, __u16 val) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + sonic_buf_put(lp->cda, lp->dma_bitmode, SONIC_CDA_CAM_ENABLE, val); +} + +static inline __u16 sonic_get_cam_enable(struct net_device* dev) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + return sonic_buf_get(lp->cda, lp->dma_bitmode, SONIC_CDA_CAM_ENABLE); +} + +static inline void sonic_tda_put(struct net_device* dev, int entry, + int offset, __u16 val) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + sonic_buf_put(lp->tda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_TD) + offset, val); +} + +static inline __u16 sonic_tda_get(struct net_device* dev, int entry, + int offset) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + return sonic_buf_get(lp->tda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_TD) + offset); +} + +static inline void sonic_rda_put(struct net_device* dev, int entry, + int offset, __u16 val) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + sonic_buf_put(lp->rda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_RD) + offset, val); +} + +static inline __u16 sonic_rda_get(struct net_device* dev, int entry, + int offset) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + return sonic_buf_get(lp->rda, lp->dma_bitmode, + (entry * SIZEOF_SONIC_RD) + offset); +} + +static inline void sonic_rra_put(struct net_device* dev, int entry, + int offset, __u16 val) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + sonic_buf_put(lp->rra, lp->dma_bitmode, + (entry * SIZEOF_SONIC_RR) + offset, val); +} + +static inline __u16 sonic_rra_get(struct net_device* dev, int entry, + int offset) +{ + struct sonic_local* lp = (struct sonic_local *) dev->priv; + return sonic_buf_get(lp->rra, lp->dma_bitmode, + (entry * SIZEOF_SONIC_RR) + offset); +} + static const char *version = "sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n"; From 8531c5ffbca65f6df868637c26e6df6f88bff738 Mon Sep 17 00:00:00 2001 From: Arthur Kepner Date: Tue, 23 Aug 2005 01:34:53 -0400 Subject: [PATCH 132/584] [PATCH] bonding: inherit zero-copy flags of slaves This change allows a bonding device to inherit the "zero-copy" features of its slave devices. It was inspired by a couple of previous postings on this topic: http://marc.theaimsgroup.com/?l=bonding-devel&m=111924607327794&w=2 http://marc.theaimsgroup.com/?l=bonding-devel&m=111925242706297&w=2 and it's largely a combination of the patches that appear in those emails. Signed-off-by: Arthur Kepner --- drivers/net/bonding/bond_main.c | 58 ++++++++++++++++++++++++++++++++- drivers/net/bonding/bonding.h | 3 ++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2c930da90a85..94c9f68dd16b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1604,6 +1604,44 @@ static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_ return 0; } +#define BOND_INTERSECT_FEATURES \ + (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) + +/* + * Compute the features available to the bonding device by + * intersection of all of the slave devices' BOND_INTERSECT_FEATURES. + * Call this after attaching or detaching a slave to update the + * bond's features. + */ +static int bond_compute_features(struct bonding *bond) +{ + int i; + struct slave *slave; + struct net_device *bond_dev = bond->dev; + int features = bond->bond_features; + + bond_for_each_slave(bond, slave, i) { + struct net_device * slave_dev = slave->dev; + if (i == 0) { + features |= BOND_INTERSECT_FEATURES; + } + features &= + ~(~slave_dev->features & BOND_INTERSECT_FEATURES); + } + + /* turn off NETIF_F_SG if we need a csum and h/w can't do it */ + if ((features & NETIF_F_SG) && + !(features & (NETIF_F_IP_CSUM | + NETIF_F_NO_CSUM | + NETIF_F_HW_CSUM))) { + features &= ~NETIF_F_SG; + } + + bond_dev->features = features; + + return 0; +} + /* enslave device to bond device */ static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1811,6 +1849,8 @@ static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_de new_slave->delay = 0; new_slave->link_failure_count = 0; + bond_compute_features(bond); + if (bond->params.miimon && !bond->params.use_carrier) { link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -2015,7 +2055,7 @@ err_free: err_undo_flags: bond_dev->features = old_features; - + return res; } @@ -2100,6 +2140,8 @@ static int bond_release(struct net_device *bond_dev, struct net_device *slave_de /* release the slave from its bond */ bond_detach_slave(bond, slave); + bond_compute_features(bond); + if (bond->primary_slave == slave) { bond->primary_slave = NULL; } @@ -2243,6 +2285,8 @@ static int bond_release_all(struct net_device *bond_dev) bond_alb_deinit_slave(bond, slave); } + bond_compute_features(bond); + /* now that the slave is detached, unlock and perform * all the undo steps that should not be called from * within a lock. @@ -3588,6 +3632,7 @@ static int bond_master_netdev_event(unsigned long event, struct net_device *bond static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { struct net_device *bond_dev = slave_dev->master; + struct bonding *bond = bond_dev->priv; switch (event) { case NETDEV_UNREGISTER: @@ -3626,6 +3671,9 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave * TODO: handle changing the primary's name */ break; + case NETDEV_FEAT_CHANGE: + bond_compute_features(bond); + break; default: break; } @@ -4526,6 +4574,11 @@ static inline void bond_set_mode_ops(struct bonding *bond, int mode) } } +static struct ethtool_ops bond_ethtool_ops = { + .get_tx_csum = ethtool_op_get_tx_csum, + .get_sg = ethtool_op_get_sg, +}; + /* * Does not allocate but creates a /proc entry. * Allowed to fail. @@ -4555,6 +4608,7 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par bond_dev->stop = bond_close; bond_dev->get_stats = bond_get_stats; bond_dev->do_ioctl = bond_do_ioctl; + bond_dev->ethtool_ops = &bond_ethtool_ops; bond_dev->set_multicast_list = bond_set_multicast_list; bond_dev->change_mtu = bond_change_mtu; bond_dev->set_mac_address = bond_set_mac_address; @@ -4591,6 +4645,8 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER); + bond->bond_features = bond_dev->features; + #ifdef CONFIG_PROC_FS bond_create_proc_entry(bond); #endif diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index d27f377b3eeb..388196980862 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -211,6 +211,9 @@ struct bonding { struct bond_params params; struct list_head vlan_list; struct vlan_group *vlgrp; + /* the features the bonding device supports, independently + * of any slaves */ + int bond_features; }; /** From 06c7427021f1cc83703f14659d8405ca773ba1ef Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Aug 2005 22:06:09 -0700 Subject: [PATCH 133/584] [FIB_TRIE]: Don't ignore negative results from fib_semantic_match When a semantic match occurs either success, not found or an error (for matching unreachable routes/blackholes) is returned. fib_trie ignores the errors and looks for a different matching route. Treat results other than "no match" as success and end lookup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a701405fab0b..45efd5f4741b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1333,9 +1333,9 @@ err:; } static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, - struct fib_result *res, int *err) + struct fib_result *res) { - int i; + int err, i; t_key mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; @@ -1348,18 +1348,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl if (l->key != (key & mask)) continue; - if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { + if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { *plen = i; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; #endif - return 1; + return err; } #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_miss++; #endif } - return 0; + return 1; } static int @@ -1386,7 +1386,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result /* Just a leaf? */ if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; goto failed; } @@ -1508,7 +1508,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result continue; } if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; } backtrace: From 40bb0c3ef52d872de348e10000eb5432a43a147d Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 24 Aug 2005 17:36:21 +0200 Subject: [PATCH 134/584] [PATCH] m68k: fix broken macros causing compile errors Add parens around macro parameters. Signed-off-by: Andreas Schwab Signed-off-by: Linus Torvalds --- include/asm-m68k/page.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index 99a516709210..206313e2a817 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -138,13 +138,13 @@ extern unsigned long m68k_memoffset; #define __pa(vaddr) ((unsigned long)(vaddr)+m68k_memoffset) #define __va(paddr) ((void *)((unsigned long)(paddr)-m68k_memoffset)) #else -#define __pa(vaddr) virt_to_phys((void *)vaddr) -#define __va(paddr) phys_to_virt((unsigned long)paddr) +#define __pa(vaddr) virt_to_phys((void *)(vaddr)) +#define __va(paddr) phys_to_virt((unsigned long)(paddr)) #endif #else /* !CONFIG_SUN3 */ /* This #define is a horrible hack to suppress lots of warnings. --m */ -#define __pa(x) ___pa((unsigned long)x) +#define __pa(x) ___pa((unsigned long)(x)) static inline unsigned long ___pa(unsigned long x) { if(x == 0) From 3725822f7c7134249addcd4549aff086950c8090 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 24 Aug 2005 04:15:10 -0700 Subject: [PATCH 135/584] [PATCH] cpu_exclusive sched domains build fix As reported by Paul Mackerras , the previous patch "cpu_exclusive sched domains fix" broke the ppc64 build with CONFIC_CPUSET, yielding error messages: kernel/cpuset.c: In function 'update_cpu_domains': kernel/cpuset.c:648: error: invalid lvalue in unary '&' kernel/cpuset.c:648: error: invalid lvalue in unary '&' On some arch's, the node_to_cpumask() is a function, returning a cpumask_t. But the for_each_cpu_mask() requires an lvalue mask. The following patch fixes this build failure by making a copy of the cpumask_t on the stack. Signed-off-by: Paul Jackson Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e0d296c5b302..d7f4d0c95737 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -645,7 +645,9 @@ static void update_cpu_domains(struct cpuset *cur) int i, j; for_each_cpu_mask(i, cur->cpus_allowed) { - for_each_cpu_mask(j, node_to_cpumask(cpu_to_node(i))) { + cpumask_t mask = node_to_cpumask(cpu_to_node(i)); + + for_each_cpu_mask(j, mask) { if (!cpu_isset(j, cur->cpus_allowed)) return; } From 5477d30e841e0f707fd2daddc8cb6949858476ee Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 24 Aug 2005 14:18:53 +1000 Subject: [PATCH 136/584] [PATCH] ppc64: Export machine_power_off for therm_pm72 module This patch puts back the export of machine_power_off() that was removed by some janitor as it's used for emergency shutdown by the G5 thermal control driver. Wether that driver should use kernel_power_off() instead is debatable and a post-2.6.13 decision. In the meantime, please commit that patch that fixes the driver for now. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 687e85595208..e9c24d2dbd91 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -706,6 +706,8 @@ void machine_power_off(void) local_irq_disable(); while (1) ; } +/* Used by the G5 thermal driver */ +EXPORT_SYMBOL_GPL(machine_power_off); void machine_halt(void) { From b7561524765a30334bf31c56b523aeb3c1a04c7d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 24 Aug 2005 07:37:37 +0200 Subject: [PATCH 137/584] [PATCH] x86_64: update defconfig - reenable fusion I mistakedly disabled fusion support in an earlier update. Fusion is commonly used on many x86-64 systems, so this was a problem. This patch fixes that. Signed-off-by: And Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/defconfig | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 776f3c866b70..b97a61e1c71c 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc3 -# Fri Jul 22 16:47:31 2005 +# Linux kernel version: 2.6.13-rc6-git3 +# Fri Aug 12 16:40:34 2005 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -284,10 +284,6 @@ CONFIG_IPV6=y # Network testing # # CONFIG_NET_PKTGEN is not set -CONFIG_NETPOLL=y -# CONFIG_NETPOLL_RX is not set -# CONFIG_NETPOLL_TRAP is not set -CONFIG_NET_POLL_CONTROLLER=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set @@ -463,6 +459,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set CONFIG_SCSI_SATA=y +# CONFIG_SCSI_SATA_AHCI is not set # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_NV is not set @@ -492,6 +489,7 @@ CONFIG_SCSI_QLA2XXX=y # CONFIG_SCSI_QLA2300 is not set # CONFIG_SCSI_QLA2322 is not set # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA24XX is not set # CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -512,9 +510,11 @@ CONFIG_BLK_DEV_DM=y # # Fusion MPT device support # -# CONFIG_FUSION is not set -# CONFIG_FUSION_SPI is not set +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y # CONFIG_FUSION_FC is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set # # IEEE 1394 (FireWire) support @@ -585,6 +585,7 @@ CONFIG_8139TOO=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -624,6 +625,10 @@ CONFIG_S2IO=m # CONFIG_NET_FC is not set # CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y # # ISDN subsystem From 41290c14640bc9312bf63202d14ebef075b6171a Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Wed, 24 Aug 2005 16:06:25 +1000 Subject: [PATCH 138/584] [PATCH] Export pcibios_bus_to_resource pcibios_bus_to_resource is exported on all architectures except ia64 and sparc. Add exports for the two missing architectures. Needed when Yenta socket support is compiled as a module. Signed-off-by: Keith Owens Signed-off-by: Linus Torvalds --- arch/ia64/pci/pci.c | 1 + arch/sparc64/kernel/pci.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 54d9ed444e4a..f9472c50ab42 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -380,6 +380,7 @@ void pcibios_bus_to_resource(struct pci_dev *dev, res->start = region->start + offset; res->end = region->end + offset; } +EXPORT_SYMBOL(pcibios_bus_to_resource); static int __devinit is_valid_resource(struct pci_dev *dev, int idx) { diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index bba140d98b1b..f21c993f8856 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -540,6 +540,7 @@ void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res, pbm->parent->resource_adjust(pdev, res, root); } +EXPORT_SYMBOL(pcibios_bus_to_resource); char * __init pcibios_setup(char *str) { From 9c2c38a122cc23d6a09b8004d60a33913683eedf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2005 14:57:54 +0200 Subject: [PATCH 139/584] [PATCH] cfq-iosched.c: minor fixes One critical fix and two minor fixes for 2.6.13-rc7: - Max depth must currently be 2 to allow barriers to function on SCSI - Prefer sync request over async in choosing the next request - Never allow async request to preempt or disturb the "anticipation" for a single cfq process context. This is as-designed, the code right now is buggy in that area. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/block/cfq-iosched.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index 2435a7c99b2b..cd056e7e64ec 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -47,7 +47,7 @@ static int cfq_slice_idle = HZ / 100; /* * disable queueing at the driver/hardware level */ -static int cfq_max_depth = 1; +static int cfq_max_depth = 2; /* * for the hash of cfqq inside the cfqd @@ -385,9 +385,15 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) return crq2; if (crq2 == NULL) return crq1; - if (cfq_crq_requeued(crq1)) + + if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2)) return crq1; - if (cfq_crq_requeued(crq2)) + else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1)) + return crq2; + + if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) + return crq1; + else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) return crq2; s1 = crq1->request->sector; @@ -1769,18 +1775,23 @@ static void cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *crq) { - const int sync = cfq_crq_is_sync(crq); + struct cfq_io_context *cic; cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); - if (sync) { - struct cfq_io_context *cic = crq->io_context; + /* + * we never wait for an async request and we don't allow preemption + * of an async request. so just return early + */ + if (!cfq_crq_is_sync(crq)) + return; - cfq_update_io_thinktime(cfqd, cic); - cfq_update_idle_window(cfqd, cfqq, cic); + cic = crq->io_context; - cic->last_queue = jiffies; - } + cfq_update_io_thinktime(cfqd, cic); + cfq_update_idle_window(cfqd, cfqq, cic); + + cic->last_queue = jiffies; if (cfqq == cfqd->active_queue) { /* From 136df52d532af9c19e5cd0e43a54ea4ee2d934fc Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:05:41 -0700 Subject: [PATCH 140/584] [PATCH] e100: Do not check Rx packet length against mtu Do not check Rx packet length against mtu - patch from Darren Tucker Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index d0fa2448761d..bff135457928 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1539,7 +1539,7 @@ static inline int e100_rx_indicate(struct nic *nic, struct rx *rx, /* Don't indicate if hardware indicates errors */ nic->net_stats.rx_dropped++; dev_kfree_skb_any(skb); - } else if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + } else if(actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN) { /* Don't indicate oversized frames */ nic->rx_over_length_errors++; nic->net_stats.rx_dropped++; From a074fb860846937a4a46dbbf439cbbb2e2ba960c Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:05:57 -0700 Subject: [PATCH 141/584] [PATCH] e100: added msleep_interruptible delay added msleep_interruptible delay right before returning from diag_test Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index bff135457928..b723f4dca11e 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2108,6 +2108,8 @@ static void e100_diag_test(struct net_device *netdev, } for(i = 0; i < E100_TEST_LEN; i++) test->flags |= data[i] ? ETH_TEST_FL_FAILED : 0; + + msleep_interruptible(4 * 1000); } static int e100_phys_id(struct net_device *netdev, u32 data) From 996ec3533ae15424e339089a8045ca1c998f8a87 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:06:08 -0700 Subject: [PATCH 142/584] [PATCH] e100: fixed endian bug in xmit_prepare routine Fixed endian bug associated with cb_i bit in xmit_prepare Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index b723f4dca11e..e0fcac889a5a 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1307,7 +1307,8 @@ static inline void e100_xmit_prepare(struct nic *nic, struct cb *cb, { cb->command = nic->tx_command; /* interrupt every 16 packets regardless of delay */ - if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cb_i; + if((nic->cbs_avail & ~15) == nic->cbs_avail) + cb->command |= cpu_to_le16(cb_i); cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd); cb->u.tcb.tcb_byte_count = 0; cb->u.tcb.threshold = nic->tx_threshold; From e6280f26b43775d8fa0c54e50c92491cfccbf738 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:06:23 -0700 Subject: [PATCH 143/584] [PATCH] e100: Increased delay loop for command blocks Increased delay loop for command blocks Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index e0fcac889a5a..09448ec48a79 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -785,6 +785,7 @@ static int e100_eeprom_save(struct nic *nic, u16 start, u16 count) } #define E100_WAIT_SCB_TIMEOUT 20000 /* we might have to wait 100ms!!! */ +#define E100_WAIT_SCB_FAST 20 /* delay like the old code */ static inline int e100_exec_cmd(struct nic *nic, u8 cmd, dma_addr_t dma_addr) { unsigned long flags; @@ -798,7 +799,7 @@ static inline int e100_exec_cmd(struct nic *nic, u8 cmd, dma_addr_t dma_addr) if(likely(!readb(&nic->csr->scb.cmd_lo))) break; cpu_relax(); - if(unlikely(i > (E100_WAIT_SCB_TIMEOUT >> 1))) + if(unlikely(i > E100_WAIT_SCB_FAST)) udelay(5); } if(unlikely(i == E100_WAIT_SCB_TIMEOUT)) { From 685fac63f5ca6c5ca06bab641e1a32bbf9287e89 Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:06:34 -0700 Subject: [PATCH 144/584] [PATCH] e100: CPU cycle saver microcode Add cpu cycle saver microcode to 8086:{1209/1229} other than ICH devices. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 224 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 206 insertions(+), 18 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 09448ec48a79..7e303744dd58 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -903,8 +903,8 @@ static void mdio_write(struct net_device *netdev, int addr, int reg, int data) static void e100_get_defaults(struct nic *nic) { - struct param_range rfds = { .min = 16, .max = 256, .count = 64 }; - struct param_range cbs = { .min = 64, .max = 256, .count = 64 }; + struct param_range rfds = { .min = 16, .max = 256, .count = 256 }; + struct param_range cbs = { .min = 64, .max = 256, .count = 128 }; pci_read_config_byte(nic->pdev, PCI_REVISION_ID, &nic->rev_id); /* MAC type is encoded as rev ID; exception: ICH is treated as 82559 */ @@ -1007,25 +1007,213 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]); } +/********************************************************/ +/* Micro code for 8086:1229 Rev 8 */ +/********************************************************/ + +/* Parameter values for the D101M B-step */ +#define D101M_CPUSAVER_TIMER_DWORD 78 +#define D101M_CPUSAVER_BUNDLE_DWORD 65 +#define D101M_CPUSAVER_MIN_SIZE_DWORD 126 + +#define D101M_B_RCVBUNDLE_UCODE \ +{\ +0x00550215, 0xFFFF0437, 0xFFFFFFFF, 0x06A70789, 0xFFFFFFFF, 0x0558FFFF, \ +0x000C0001, 0x00101312, 0x000C0008, 0x00380216, \ +0x0010009C, 0x00204056, 0x002380CC, 0x00380056, \ +0x0010009C, 0x00244C0B, 0x00000800, 0x00124818, \ +0x00380438, 0x00000000, 0x00140000, 0x00380555, \ +0x00308000, 0x00100662, 0x00100561, 0x000E0408, \ +0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ +0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ +0x000C007E, 0x00222C21, 0x000C0002, 0x00103093, \ +0x00380C7A, 0x00080000, 0x00103090, 0x00380C7A, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x0010009C, 0x00244C2D, 0x00010004, 0x00041000, \ +0x003A0437, 0x00044010, 0x0038078A, 0x00000000, \ +0x00100099, 0x00206C7A, 0x0010009C, 0x00244C48, \ +0x00130824, 0x000C0001, 0x00101213, 0x00260C75, \ +0x00041000, 0x00010004, 0x00130826, 0x000C0006, \ +0x002206A8, 0x0013C926, 0x00101313, 0x003806A8, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ +0x00101210, 0x00380C34, 0x00000000, 0x00000000, \ +0x0021155B, 0x00100099, 0x00206559, 0x0010009C, \ +0x00244559, 0x00130836, 0x000C0000, 0x00220C62, \ +0x000C0001, 0x00101B13, 0x00229C0E, 0x00210C0E, \ +0x00226C0E, 0x00216C0E, 0x0022FC0E, 0x00215C0E, \ +0x00214C0E, 0x00380555, 0x00010004, 0x00041000, \ +0x00278C67, 0x00040800, 0x00018100, 0x003A0437, \ +0x00130826, 0x000C0001, 0x00220559, 0x00101313, \ +0x00380559, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00130831, 0x0010090B, 0x00124813, \ +0x000CFF80, 0x002606AB, 0x00041000, 0x00010004, \ +0x003806A8, 0x00000000, 0x00000000, 0x00000000, \ +} + +/********************************************************/ +/* Micro code for 8086:1229 Rev 9 */ +/********************************************************/ + +/* Parameter values for the D101S */ +#define D101S_CPUSAVER_TIMER_DWORD 78 +#define D101S_CPUSAVER_BUNDLE_DWORD 67 +#define D101S_CPUSAVER_MIN_SIZE_DWORD 128 + +#define D101S_RCVBUNDLE_UCODE \ +{\ +0x00550242, 0xFFFF047E, 0xFFFFFFFF, 0x06FF0818, 0xFFFFFFFF, 0x05A6FFFF, \ +0x000C0001, 0x00101312, 0x000C0008, 0x00380243, \ +0x0010009C, 0x00204056, 0x002380D0, 0x00380056, \ +0x0010009C, 0x00244F8B, 0x00000800, 0x00124818, \ +0x0038047F, 0x00000000, 0x00140000, 0x003805A3, \ +0x00308000, 0x00100610, 0x00100561, 0x000E0408, \ +0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ +0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ +0x000C007E, 0x00222FA1, 0x000C0002, 0x00103093, \ +0x00380F90, 0x00080000, 0x00103090, 0x00380F90, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x0010009C, 0x00244FAD, 0x00010004, 0x00041000, \ +0x003A047E, 0x00044010, 0x00380819, 0x00000000, \ +0x00100099, 0x00206FFD, 0x0010009A, 0x0020AFFD, \ +0x0010009C, 0x00244FC8, 0x00130824, 0x000C0001, \ +0x00101213, 0x00260FF7, 0x00041000, 0x00010004, \ +0x00130826, 0x000C0006, 0x00220700, 0x0013C926, \ +0x00101313, 0x00380700, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ +0x00101210, 0x00380FB6, 0x00000000, 0x00000000, \ +0x002115A9, 0x00100099, 0x002065A7, 0x0010009A, \ +0x0020A5A7, 0x0010009C, 0x002445A7, 0x00130836, \ +0x000C0000, 0x00220FE4, 0x000C0001, 0x00101B13, \ +0x00229F8E, 0x00210F8E, 0x00226F8E, 0x00216F8E, \ +0x0022FF8E, 0x00215F8E, 0x00214F8E, 0x003805A3, \ +0x00010004, 0x00041000, 0x00278FE9, 0x00040800, \ +0x00018100, 0x003A047E, 0x00130826, 0x000C0001, \ +0x002205A7, 0x00101313, 0x003805A7, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00130831, \ +0x0010090B, 0x00124813, 0x000CFF80, 0x00260703, \ +0x00041000, 0x00010004, 0x00380700 \ +} + +/********************************************************/ +/* Micro code for the 8086:1229 Rev F/10 */ +/********************************************************/ + +/* Parameter values for the D102 E-step */ +#define D102_E_CPUSAVER_TIMER_DWORD 42 +#define D102_E_CPUSAVER_BUNDLE_DWORD 54 +#define D102_E_CPUSAVER_MIN_SIZE_DWORD 46 + +#define D102_E_RCVBUNDLE_UCODE \ +{\ +0x007D028F, 0x0E4204F9, 0x14ED0C85, 0x14FA14E9, 0x0EF70E36, 0x1FFF1FFF, \ +0x00E014B9, 0x00000000, 0x00000000, 0x00000000, \ +0x00E014BD, 0x00000000, 0x00000000, 0x00000000, \ +0x00E014D5, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00E014C1, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00E014C8, 0x00000000, 0x00000000, 0x00000000, \ +0x00200600, 0x00E014EE, 0x00000000, 0x00000000, \ +0x0030FF80, 0x00940E46, 0x00038200, 0x00102000, \ +0x00E00E43, 0x00000000, 0x00000000, 0x00000000, \ +0x00300006, 0x00E014FB, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, \ +0x00906EFD, 0x00900EFD, 0x00E00EF8, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +0x00000000, 0x00000000, 0x00000000, 0x00000000, \ +} + static void e100_load_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb) { - int i; - static const u32 ucode[UCODE_SIZE] = { - /* NFS packets are misinterpreted as TCO packets and - * incorrectly routed to the BMC over SMBus. This - * microcode patch checks the fragmented IP bit in the - * NFS/UDP header to distinguish between NFS and TCO. */ - 0x0EF70E36, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, - 0x1FFF1FFF, 0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, - 0x00906EFD, 0x00900EFD, 0x00E00EF8, - }; +/* *INDENT-OFF* */ + static struct { + u32 ucode[UCODE_SIZE + 1]; + u8 mac; + u8 timer_dword; + u8 bundle_dword; + u8 min_size_dword; + } ucode_opts[] = { + { D101M_B_RCVBUNDLE_UCODE, + mac_82559_D101M, + D101M_CPUSAVER_TIMER_DWORD, + D101M_CPUSAVER_BUNDLE_DWORD, + D101M_CPUSAVER_MIN_SIZE_DWORD }, + { D101S_RCVBUNDLE_UCODE, + mac_82559_D101S, + D101S_CPUSAVER_TIMER_DWORD, + D101S_CPUSAVER_BUNDLE_DWORD, + D101S_CPUSAVER_MIN_SIZE_DWORD }, + { D102_E_RCVBUNDLE_UCODE, + mac_82551_F, + D102_E_CPUSAVER_TIMER_DWORD, + D102_E_CPUSAVER_BUNDLE_DWORD, + D102_E_CPUSAVER_MIN_SIZE_DWORD }, + { D102_E_RCVBUNDLE_UCODE, + mac_82551_10, + D102_E_CPUSAVER_TIMER_DWORD, + D102_E_CPUSAVER_BUNDLE_DWORD, + D102_E_CPUSAVER_MIN_SIZE_DWORD }, + { {0}, 0, 0, 0, 0} + }, *opts; +/* *INDENT-ON* */ - if(nic->mac == mac_82551_F || nic->mac == mac_82551_10) { - for(i = 0; i < UCODE_SIZE; i++) - cb->u.ucode[i] = cpu_to_le32(ucode[i]); - cb->command = cpu_to_le16(cb_ucode); - } else - cb->command = cpu_to_le16(cb_nop); +#define BUNDLESMALL 1 +#define BUNDLEMAX 50 +#define INTDELAY 15000 + + opts = ucode_opts; + + /* do not load u-code for ICH devices */ + if (nic->flags & ich) + return; + + /* Search for ucode match against h/w rev_id */ + while (opts->mac) { + if (nic->mac == opts->mac) { + int i; + u32 *ucode = opts->ucode; + + /* Insert user-tunable settings */ + ucode[opts->timer_dword] &= 0xFFFF0000; + ucode[opts->timer_dword] |= + (u16) INTDELAY; + ucode[opts->bundle_dword] &= 0xFFFF0000; + ucode[opts->bundle_dword] |= (u16) BUNDLEMAX; + ucode[opts->min_size_dword] &= 0xFFFF0000; + ucode[opts->min_size_dword] |= + (BUNDLESMALL) ? 0xFFFF : 0xFF80; + + for(i = 0; i < UCODE_SIZE; i++) + cb->u.ucode[i] = cpu_to_le32(ucode[i]); + cb->command = cpu_to_le16(cb_ucode); + return; + } + opts++; + } + + cb->command = cpu_to_le16(cb_nop); } static void e100_setup_iaaddr(struct nic *nic, struct cb *cb, From 611494dccacb3e42f55359df74d604b67312598b Mon Sep 17 00:00:00 2001 From: Malli Chilakala Date: Thu, 25 Aug 2005 13:06:52 -0700 Subject: [PATCH 145/584] [PATCH] e100: Driver version, white space, comments & other Driver version, white space, comments & other Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Ganesh Venkatesan Signed-off-by: John Ronciak Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 7e303744dd58..25cc20e415da 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1,7 +1,7 @@ /******************************************************************************* - Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. + Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -156,7 +156,7 @@ #define DRV_NAME "e100" #define DRV_EXT "-NAPI" -#define DRV_VERSION "3.4.8-k2"DRV_EXT +#define DRV_VERSION "3.4.14-k2"DRV_EXT #define DRV_DESCRIPTION "Intel(R) PRO/100 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 1999-2005 Intel Corporation" #define PFX DRV_NAME ": " @@ -1504,7 +1504,7 @@ static inline void e100_xmit_prepare(struct nic *nic, struct cb *cb, cb->u.tcb.tbd_count = 1; cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev, skb->data, skb->len, PCI_DMA_TODEVICE)); - // check for mapping failure? + /* check for mapping failure? */ cb->u.tcb.tbd.size = cpu_to_le16(skb->len); } @@ -1896,6 +1896,7 @@ static int e100_poll(struct net_device *netdev, int *budget) static void e100_netpoll(struct net_device *netdev) { struct nic *nic = netdev_priv(netdev); + e100_disable_irq(nic); e100_intr(nic->pdev->irq, netdev, NULL); e100_tx_clean(nic); From d4ef16088913002255eab9958fff4e98b3b507d6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:31:41 -0700 Subject: [PATCH 146/584] [TG3]: Fix ethtool loopback test lockup The tg3_abort_hw() call in tg3_test_loopback() is causing lockups on some devices. tg3_abort_hw() disables the memory arbiter, causing tg3_reset_hw() to hang when it tries to write the pre-reset signature. tg3_abort_hw() should only be called after the pre-reset signature has been written. This is all done in tg3_reset_hw() so the tg3_abort_hw() call is unnecessary and can be removed. [ Also bump driver version and release date. -DaveM ] Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 01419aff333e..6d4ab1e333b5 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -66,8 +66,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.36" -#define DRV_MODULE_RELDATE "August 19, 2005" +#define DRV_MODULE_VERSION "3.37" +#define DRV_MODULE_RELDATE "August 25, 2005" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -7865,8 +7865,6 @@ static int tg3_test_loopback(struct tg3 *tp) err = -EIO; - tg3_abort_hw(tp, 1); - tg3_reset_hw(tp); mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) | From b8f6153ee421014f42b620238f4203a4106db309 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 25 Aug 2005 22:01:20 -0400 Subject: [PATCH 147/584] libata: fix EH locking Wrap ata_qc_complete() calls in EH context in spinlocks, to prevent races (mainly in ATAPI code paths). --- drivers/scsi/ahci.c | 7 ++++++- drivers/scsi/libata-core.c | 14 ++++++++++++-- drivers/scsi/sata_promise.c | 5 +++++ drivers/scsi/sata_sx4.c | 5 +++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 348493982b56..841f4e2cfe08 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -586,12 +586,16 @@ static void ahci_intr_error(struct ata_port *ap, u32 irq_stat) static void ahci_eng_timeout(struct ata_port *ap) { - void *mmio = ap->host_set->mmio_base; + struct ata_host_set *host_set = ap->host_set; + void *mmio = host_set->mmio_base; void *port_mmio = ahci_port_base(mmio, ap->port_no); struct ata_queued_cmd *qc; + unsigned long flags; DPRINTK("ENTER\n"); + spin_lock_irqsave(&host_set->lock, flags); + ahci_intr_error(ap, readl(port_mmio + PORT_IRQ_STAT)); qc = ata_qc_from_tag(ap, ap->active_tag); @@ -609,6 +613,7 @@ static void ahci_eng_timeout(struct ata_port *ap) ata_qc_complete(qc, ATA_ERR); } + spin_unlock_irqrestore(&host_set->lock, flags); } static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 157a3e914cb7..ec7bff73ae18 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2388,12 +2388,13 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) void ata_poll_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat) { struct ata_port *ap = qc->ap; + unsigned long flags; - spin_lock_irq(&ap->host_set->lock); + spin_lock_irqsave(&ap->host_set->lock, flags); ap->flags &= ~ATA_FLAG_NOINTR; ata_irq_on(ap); ata_qc_complete(qc, drv_stat); - spin_unlock_irq(&ap->host_set->lock); + spin_unlock_irqrestore(&ap->host_set->lock, flags); } /** @@ -2973,8 +2974,10 @@ static void atapi_request_sense(struct ata_port *ap, struct ata_device *dev, static void ata_qc_timeout(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_host_set *host_set = ap->host_set; struct ata_device *dev = qc->dev; u8 host_stat = 0, drv_stat; + unsigned long flags; DPRINTK("ENTER\n"); @@ -2985,7 +2988,9 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) if (!(cmd->eh_eflags & SCSI_EH_CANCEL_CMD)) { /* finish completing original command */ + spin_lock_irqsave(&host_set->lock, flags); __ata_qc_complete(qc); + spin_unlock_irqrestore(&host_set->lock, flags); atapi_request_sense(ap, dev, cmd); @@ -2996,6 +3001,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) } } + spin_lock_irqsave(&host_set->lock, flags); + /* hack alert! We cannot use the supplied completion * function from inside the ->eh_strategy_handler() thread. * libata is the only user of ->eh_strategy_handler() in @@ -3029,6 +3036,9 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) ata_qc_complete(qc, drv_stat); break; } + + spin_unlock_irqrestore(&host_set->lock, flags); + out: DPRINTK("EXIT\n"); } diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index defcc1fb3f16..b8dc49fed769 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -325,11 +325,15 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc) static void pdc_eng_timeout(struct ata_port *ap) { + struct ata_host_set *host_set = ap->host_set; u8 drv_stat; struct ata_queued_cmd *qc; + unsigned long flags; DPRINTK("ENTER\n"); + spin_lock_irqsave(&host_set->lock, flags); + qc = ata_qc_from_tag(ap, ap->active_tag); if (!qc) { printk(KERN_ERR "ata%u: BUG: timeout without command\n", @@ -363,6 +367,7 @@ static void pdc_eng_timeout(struct ata_port *ap) } out: + spin_unlock_irqrestore(&host_set->lock, flags); DPRINTK("EXIT\n"); } diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index e2db499f22dd..a20d4285090a 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -848,10 +848,14 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re static void pdc_eng_timeout(struct ata_port *ap) { u8 drv_stat; + struct ata_host_set *host_set = ap->host_set; struct ata_queued_cmd *qc; + unsigned long flags; DPRINTK("ENTER\n"); + spin_lock_irqsave(&host_set->lock, flags); + qc = ata_qc_from_tag(ap, ap->active_tag); if (!qc) { printk(KERN_ERR "ata%u: BUG: timeout without command\n", @@ -885,6 +889,7 @@ static void pdc_eng_timeout(struct ata_port *ap) } out: + spin_unlock_irqrestore(&host_set->lock, flags); DPRINTK("EXIT\n"); } From 3690b6c124fbc7259634f3b80d92a6d9fe51ec79 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 26 Aug 2005 00:30:37 +0200 Subject: [PATCH 148/584] [PATCH] sis190: complete the mii probe before registering the netdevice The userspace must not be able to issue ethtool command and manage the mii before it is completely initialized. Avoid some pesky "eth%d" messages. Signed-off-by: Arnaud Patard Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/sis190.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 915ff009c295..bf3440aa6c24 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -1271,7 +1271,7 @@ static u16 sis190_default_phy(struct net_device *dev) mii_if->phy_id = phy_default->phy_id; net_probe(tp, KERN_INFO "%s: Using transceiver at address %d as default.\n", - dev->name, mii_if->phy_id); + pci_name(tp->pci_dev), mii_if->phy_id); } status = mdio_read(ioaddr, mii_if->phy_id, MII_BMCR); @@ -1312,8 +1312,8 @@ static void sis190_init_phy(struct net_device *dev, struct sis190_private *tp, phy->type = UNKNOWN; net_probe(tp, KERN_INFO "%s: %s transceiver at address %d.\n", - dev->name, (phy->type == UNKNOWN) ? "Unknown PHY" : p->name, - phy_id); + pci_name(tp->pci_dev), + (phy->type == UNKNOWN) ? "Unknown PHY" : p->name, phy_id); } /** @@ -1358,7 +1358,7 @@ static int __devinit sis190_mii_probe(struct net_device *dev) if (list_empty(&tp->first_phy)) { net_probe(tp, KERN_INFO "%s: No MII transceivers found!\n", - dev->name); + pci_name(tp->pci_dev)); rc = -EIO; goto out; } @@ -1780,15 +1780,16 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, dev->base_addr = (unsigned long) 0xdead; spin_lock_init(&tp->lock); - rc = register_netdev(dev); - if (rc < 0) - goto err_release_board; - - pci_set_drvdata(pdev, dev); rc = sis190_mii_probe(dev); if (rc < 0) - goto err_unregister_dev; + goto err_release_board; + + rc = register_netdev(dev); + if (rc < 0) + goto err_remove_mii; + + pci_set_drvdata(pdev, dev); net_probe(tp, KERN_INFO "%s: %s at %p (IRQ: %d), " "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", @@ -1804,8 +1805,8 @@ static int __devinit sis190_init_one(struct pci_dev *pdev, out: return rc; -err_unregister_dev: - unregister_netdev(dev); +err_remove_mii: + sis190_mii_remove(dev); err_release_board: sis190_release_board(pdev); goto out; From 26aad69e3dd854abe9028ca873fb40b410a39dd7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 26 Aug 2005 10:40:10 -0700 Subject: [PATCH 149/584] Only pre-allocate 256 bytes of cardbio IO range It may seem small, but most cards need much less, if any, and this not only makes the code adhere to the comment, it seems to fix a boot-time lockup on a ThinkPad 380XD laptop reported by Tero Roponen Signed-off-by: Linus Torvalds --- drivers/pci/setup-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index a2eebc6eaacc..6d864c502a1f 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -40,7 +40,7 @@ * FIXME: IO should be max 256 bytes. However, since we may * have a P2P bridge below a cardbus bridge, we need 4K. */ -#define CARDBUS_IO_SIZE (4096) +#define CARDBUS_IO_SIZE (256) #define CARDBUS_MEM_SIZE (32*1024*1024) static void __devinit From 755528c860b05fcecda1c88a2bdaffcb50760a7f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 26 Aug 2005 10:49:22 -0700 Subject: [PATCH 150/584] Ignore disabled ROM resources at setup Writing even a disabled value seems to mess up some matrox graphics cards. It may be a card-related issue, but we may also be writing reserved low bits in the result. This was a fall-out of switching x86 over to the generic PCI resource allocation code, and needs more debugging. In particular, the old x86 code defaulted to not doing any resource allocations at all for ROM resources. In the meantime, this has been reported to make X happier by Helge Hafting . Signed-off-by: Linus Torvalds --- drivers/pci/setup-res.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 84eedc965688..5598b4714f77 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -53,7 +53,9 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) if (resno < 6) { reg = PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { - new |= res->flags & IORESOURCE_ROM_ENABLE; + if (!(res->flags & IORESOURCE_ROM_ENABLE)) + return; + new |= PCI_ROM_ADDRESS_ENABLE; reg = dev->rom_base_reg; } else { /* Hmm, non-standard resource. */ From 7c657f2f25d50c602df9291bc6242b98fc090759 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Fri, 26 Aug 2005 14:02:04 -0400 Subject: [PATCH 151/584] [PATCH] Document idr_get_new_above() semantics, update inotify There is an off by one problem with idr_get_new_above. The comment and function name suggest that it will return an id > starting_id, but it actually returned an id >= starting_id, and kernel callers other than inotify treated it as such. The patch below fixes the comment, and fixes inotifys usage. The function name still doesn't match the behaviour, but it never did. Signed-off-by: John McCutchan Signed-off-by: Linus Torvalds --- fs/inotify.c | 2 +- lib/idr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/inotify.c b/fs/inotify.c index 868901b1e779..2e4e2a57708c 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -353,7 +353,7 @@ static int inotify_dev_get_wd(struct inotify_device *dev, do { if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) return -ENOSPC; - ret = idr_get_new_above(&dev->idr, watch, dev->last_wd, &watch->wd); + ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd); } while (ret == -EAGAIN); return ret; diff --git a/lib/idr.c b/lib/idr.c index c5be889de449..6415d053e2bf 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -207,7 +207,7 @@ build_up: } /** - * idr_get_new_above - allocate new idr entry above a start id + * idr_get_new_above - allocate new idr entry above or equal to a start id * @idp: idr handle * @ptr: pointer you want associated with the ide * @start_id: id to start search at From d7a60d50d7713b65a3fd88f11d5717b83a6b6a97 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Fri, 26 Aug 2005 16:57:44 +0200 Subject: [PATCH 152/584] [PATCH] Fixup symlink function pointers for hppfs [for 2.6.13] Update hppfs for the symlink functions prototype change. Yes, I know the code I leave there is still _bogus_, see next patch for this. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Linus Torvalds --- fs/hppfs/hppfs_kern.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index ff150fedb981..385d440fa234 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -679,25 +679,25 @@ static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) return(n); } -static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct file *proc_file; struct dentry *proc_dentry; - int (*follow_link)(struct dentry *, struct nameidata *); - int err, n; + void * (*follow_link)(struct dentry *, struct nameidata *); + void *ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); - err = PTR_ERR(proc_dentry); - if(IS_ERR(proc_dentry)) - return(err); + + if (IS_ERR(proc_dentry)) + return proc_dentry; follow_link = proc_dentry->d_inode->i_op->follow_link; - n = (*follow_link)(proc_dentry, nd); + ret = (*follow_link)(proc_dentry, nd); fput(proc_file); - return(n); + return ret; } static struct inode_operations hppfs_dir_iops = { From fd589e0b662c1ea8cfb1e0d20d60a2510979865b Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Fri, 26 Aug 2005 16:57:53 +0200 Subject: [PATCH 153/584] [PATCH] hppfs: fix symlink error path While touching this code I noticed the error handling is bogus, so I fixed it up. I've removed the IS_ERR(proc_dentry) check, which will never trigger and is clearly a typo: we must check proc_file instead. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Linus Torvalds --- fs/hppfs/hppfs_kern.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 385d440fa234..52930915bad8 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -38,7 +38,7 @@ struct hppfs_inode_info { static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) { - return(list_entry(inode, struct hppfs_inode_info, vfs_inode)); + return container_of(inode, struct hppfs_inode_info, vfs_inode); } #define HPPFS_SUPER_MAGIC 0xb00000ee @@ -662,38 +662,32 @@ static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) { struct file *proc_file; struct dentry *proc_dentry; - int (*readlink)(struct dentry *, char *, int); - int err, n; + int ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); - err = PTR_ERR(proc_dentry); - if(IS_ERR(proc_dentry)) - return(err); + if (IS_ERR(proc_file)) + return PTR_ERR(proc_file); - readlink = proc_dentry->d_inode->i_op->readlink; - n = (*readlink)(proc_dentry, buffer, buflen); + ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen); fput(proc_file); - return(n); + return ret; } static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct file *proc_file; struct dentry *proc_dentry; - void * (*follow_link)(struct dentry *, struct nameidata *); void *ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); + if (IS_ERR(proc_file)) + return proc_file; - if (IS_ERR(proc_dentry)) - return proc_dentry; - - follow_link = proc_dentry->d_inode->i_op->follow_link; - ret = (*follow_link)(proc_dentry, nd); + ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); fput(proc_file); From 31433ea369d250ad011577eee6110d3efcb9d861 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 26 Aug 2005 15:56:47 +0100 Subject: [PATCH 154/584] [PATCH] libata: typo You spelt heuristic wrongly. Also reformatted to 80 columns, ignore the diff and fix the typo if you prefer that. Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index ec7bff73ae18..19b45c2bfa26 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2459,11 +2459,10 @@ static void ata_pio_complete (struct ata_port *ap) u8 drv_stat; /* - * This is purely hueristic. This is a fast path. - * Sometimes when we enter, BSY will be cleared in - * a chk-status or two. If not, the drive is probably seeking - * or something. Snooze for a couple msecs, then - * chk-status again. If still busy, fall back to + * This is purely heuristic. This is a fast path. Sometimes when + * we enter, BSY will be cleared in a chk-status or two. If not, + * the drive is probably seeking or something. Snooze for a couple + * msecs, then chk-status again. If still busy, fall back to * PIO_ST_POLL state. */ drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10); From b73fc89f6d1f84326e5e897ad249d00a9f218fd7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 26 Aug 2005 16:03:19 +0100 Subject: [PATCH 155/584] [PATCH] libata: regularize dma_start/stop arguments Needed for a few PATA drivers. Also fix up a wrong comment. Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 11 ++++++----- drivers/scsi/sata_qstor.c | 4 ++-- include/linux/libata.h | 5 +++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 19b45c2bfa26..f15a07f9f471 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3017,7 +3017,7 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) host_stat = ap->ops->bmdma_status(ap); /* before we do anything else, clear DMA-Start bit */ - ap->ops->bmdma_stop(ap); + ap->ops->bmdma_stop(qc); /* fall through */ @@ -3399,7 +3399,7 @@ static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc) } /** - * ata_bmdma_start - Start a PCI IDE BMDMA transaction + * ata_bmdma_start_mmio - Start a PCI IDE BMDMA transaction * @qc: Info associated with this ATA transaction. * * LOCKING: @@ -3570,7 +3570,7 @@ u8 ata_bmdma_status(struct ata_port *ap) /** * ata_bmdma_stop - Stop PCI IDE BMDMA transfer - * @ap: Port associated with this ATA transaction. + * @qc: Command we are ending DMA for * * Clears the ATA_DMA_START flag in the dma control register * @@ -3580,8 +3580,9 @@ u8 ata_bmdma_status(struct ata_port *ap) * spin_lock_irqsave(host_set lock) */ -void ata_bmdma_stop(struct ata_port *ap) +void ata_bmdma_stop(struct ata_queued_cmd *qc) { + struct ata_port *ap = qc->ap; if (ap->flags & ATA_FLAG_MMIO) { void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr; @@ -3633,7 +3634,7 @@ inline unsigned int ata_host_intr (struct ata_port *ap, goto idle_irq; /* before we do anything else, clear DMA-Start bit */ - ap->ops->bmdma_stop(ap); + ap->ops->bmdma_stop(qc); /* fall through */ diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 08a84042ce09..2926846cdd6a 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -117,7 +117,7 @@ static void qs_phy_reset(struct ata_port *ap); static void qs_qc_prep(struct ata_queued_cmd *qc); static int qs_qc_issue(struct ata_queued_cmd *qc); static int qs_check_atapi_dma(struct ata_queued_cmd *qc); -static void qs_bmdma_stop(struct ata_port *ap); +static void qs_bmdma_stop(struct ata_queued_cmd *qc); static u8 qs_bmdma_status(struct ata_port *ap); static void qs_irq_clear(struct ata_port *ap); static void qs_eng_timeout(struct ata_port *ap); @@ -198,7 +198,7 @@ static int qs_check_atapi_dma(struct ata_queued_cmd *qc) return 1; /* ATAPI DMA not supported */ } -static void qs_bmdma_stop(struct ata_port *ap) +static void qs_bmdma_stop(struct ata_queud_cmd *qc) { /* nothing */ } diff --git a/include/linux/libata.h b/include/linux/libata.h index 724b7d1c18ea..33f3ab4eb827 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -365,7 +365,7 @@ struct ata_port_operations { void (*host_stop) (struct ata_host_set *host_set); - void (*bmdma_stop) (struct ata_port *ap); + void (*bmdma_stop) (struct ata_queued_cmd *qc); u8 (*bmdma_status) (struct ata_port *ap); }; @@ -424,9 +424,10 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); +extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); -extern void ata_bmdma_stop(struct ata_port *ap); +extern void ata_bmdma_stop(struct ata_queued_cmd *qc); extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat); From d634cc15e8f33332038dc9c078beae79f9382ada Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 26 Aug 2005 14:42:59 -0500 Subject: [PATCH 156/584] [PATCH] Fix oops in fs/locks.c on close of file with pending locks The recent change to locks_remove_flock code in fs/locks.c changes how byte range locks are removed from closing files, which shows up a bug in cifs. The assumption in the cifs code was that the close call sent to the server would remove any pending locks on the server on this file, but that is no longer safe as the fs/locks.c code on the client wants unlock of 0 to PATH_MAX to remove all locks (at least from this client, it is not possible AFAIK to remove all locks from other clients made to the server copy of the file). Note that cifs locks are different from posix locks - and it is not possible to map posix locks perfectly on the wire yet, due to restrictions of the cifs network protocol, even to Samba without adding a new request type to the network protocol (which we plan to do for Samba 3.0.21 within a few months), but the local client will have the correct, posix view, of the lock in most cases. The correct fix for cifs for this would involve a bigger change than I would like to do this late in the 2.6.13-rc cycle - and would involve cifs keeping track of all unmerged (uncoalesced) byte range locks for each remote inode and scanning that list to remove locks that intersect or fall wholly within the range - locks that intersect may have to be reaquired with the smaller, remaining range. Signed-off-by: Steve French Signed-off-by: Dave Kleikamp Signed-off-by: Linus Torvalds --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 30ab70ce5547..3497125189df 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -643,7 +643,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) netfid, length, pfLock->fl_start, numUnlock, numLock, lockType, wait_flag); - if (rc == 0 && (pfLock->fl_flags & FL_POSIX)) + if (pfLock->fl_flags & FL_POSIX) posix_lock_file_wait(file, pfLock); FreeXid(xid); return rc; From 1f57ff89fee47a317e9e8ca63bf0f139802cc116 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 26 Aug 2005 01:49:14 +0400 Subject: [PATCH 157/584] [PATCH] drivers/hwmon/*: kfree() correct pointers The adm9240 driver, in adm9240_detect(), allocates a structure. The error path attempts to kfree() ->client field of it (second one), resulting in an oops (or slab corruption) if the hardware is not present. ->client field in adm1026, adm1031, smsc47b397 and smsc47m1 is the first in ${HWMON}_data structure, but fix them too. Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- drivers/hwmon/adm1026.c | 2 +- drivers/hwmon/adm1031.c | 2 +- drivers/hwmon/adm9240.c | 2 +- drivers/hwmon/smsc47b397.c | 2 +- drivers/hwmon/smsc47m1.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index 4fa17c76eea2..ded6e8adc854 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -1691,7 +1691,7 @@ int adm1026_detect(struct i2c_adapter *adapter, int address, /* Error out and cleanup code */ exitfree: - kfree(new_client); + kfree(data); exit: return err; } diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 9168e983ca1d..4211c8b4601a 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -834,7 +834,7 @@ static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind) return 0; exit_free: - kfree(new_client); + kfree(data); exit: return err; } diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 5c68e9c311aa..ce2a6eb93f6e 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -616,7 +616,7 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) return 0; exit_free: - kfree(new_client); + kfree(data); exit: return err; } diff --git a/drivers/hwmon/smsc47b397.c b/drivers/hwmon/smsc47b397.c index 251ac2659554..fdeeb3ab6f2f 100644 --- a/drivers/hwmon/smsc47b397.c +++ b/drivers/hwmon/smsc47b397.c @@ -298,7 +298,7 @@ static int smsc47b397_detect(struct i2c_adapter *adapter, int addr, int kind) return 0; error_free: - kfree(new_client); + kfree(data); error_release: release_region(addr, SMSC_EXTENT); return err; diff --git a/drivers/hwmon/smsc47m1.c b/drivers/hwmon/smsc47m1.c index 897117a7213f..7166ad0b2fda 100644 --- a/drivers/hwmon/smsc47m1.c +++ b/drivers/hwmon/smsc47m1.c @@ -495,7 +495,7 @@ static int smsc47m1_detect(struct i2c_adapter *adapter, int address, int kind) return 0; error_free: - kfree(new_client); + kfree(data); error_release: release_region(address, SMSC_EXTENT); return err; From b6a9ad73897acb7ea4cf56aae0fc39ba1c471fba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Aug 2005 22:59:48 +0100 Subject: [PATCH 158/584] [PATCH] bogus iounmap() in emac Dumb typo: iounmap(&local_pointer_variable). Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/net/ibm_emac/ibm_emac_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c index c7fb3675c09d..0de3bb906174 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -1253,7 +1253,7 @@ static int emac_init_tah(struct ocp_enet_private *fep) TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | TAH_MR_DIG); - iounmap(&tahp); + iounmap(tahp); return 0; } From a46206e74e1897bf34d6b58f0991a0d6f3797e27 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Aug 2005 23:03:35 +0100 Subject: [PATCH 159/584] [PATCH] bogus function type in qdio In qdio_get_micros() volatile in return type is plain noise (even with old gccisms it would make no sense - noreturn function returning __u64 is a bit odd ;-) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/s390/cio/qdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 533f90c05cdf..381f339e3200 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -112,7 +112,7 @@ qdio_min(int a,int b) /***************** SCRUBBER HELPER ROUTINES **********************/ -static inline volatile __u64 +static inline __u64 qdio_get_micros(void) { return (get_clock() >> 10); /* time>>12 is microseconds */ From 3515d0161d55d2fa1a340932625f94240a68c262 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Aug 2005 23:13:14 +0100 Subject: [PATCH 160/584] [PATCH] late spinlock initialization in ieee1394/ohci spinlock used in irq handler should be initialized before registering irq, even if we know that our device has interrupts disabled; handler is registered shared and taking spinlock is done unconditionally. As it is, we can and do get oopsen on boot for some configuration, depending on irq routing - I've got a reproducer. Signed-off-by: Al Viro Signed-off-by: Ben Collins Signed-off-by: Linus Torvalds --- drivers/ieee1394/ohci1394.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index b12a970cc9a3..27018c8efc24 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -478,7 +478,6 @@ static void ohci_initialize(struct ti_ohci *ohci) int num_ports, i; spin_lock_init(&ohci->phy_reg_lock); - spin_lock_init(&ohci->event_lock); /* Put some defaults to these undefined bus options */ buf = reg_read(ohci, OHCI1394_BusOptions); @@ -3402,7 +3401,14 @@ static int __devinit ohci1394_pci_probe(struct pci_dev *dev, /* We hopefully don't have to pre-allocate IT DMA like we did * for IR DMA above. Allocate it on-demand and mark inactive. */ ohci->it_legacy_context.ohci = NULL; + spin_lock_init(&ohci->event_lock); + /* + * interrupts are disabled, all right, but... due to SA_SHIRQ we + * might get called anyway. We'll see no event, of course, but + * we need to get to that "no event", so enough should be initialized + * by that point. + */ if (request_irq(dev->irq, ohci_irq_handler, SA_SHIRQ, OHCI1394_DRIVER_NAME, ohci)) FAIL(-ENOMEM, "Failed to allocate shared interrupt %d", dev->irq); From 566ecb9b258ec4da3c7e1c6ca6fca8a5bb0c8eba Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 23 Aug 2005 17:20:44 -0300 Subject: [PATCH 161/584] [PATCH] ppc32 8xx: fix m8xx_ide_init() #ifdef Be more precise on deciding whether to call m8xx_ide_init() at m8xx_setup.c:platform_init(). Compilation fails if CONFIG_BLK_DEV_IDE is defined but CONFIG_BLK_DEV_MPC8xx_IDE isnt. Signed-off-by: Marcelo Tosatti Signed-off-by: Linus Torvalds --- arch/ppc/syslib/m8xx_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c index 55a381af4e37..a3702cfe8f7c 100644 --- a/arch/ppc/syslib/m8xx_setup.c +++ b/arch/ppc/syslib/m8xx_setup.c @@ -423,7 +423,7 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, ppc_md.find_end_of_memory = m8xx_find_end_of_memory; ppc_md.setup_io_mappings = m8xx_map_io; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#if defined(CONFIG_BLK_DEV_MPC8xx_IDE) m8xx_ide_init(); #endif } From 3fd1bb9baa394856b112e5edbfd3893d92dd1149 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 25 Aug 2005 18:43:37 +0200 Subject: [PATCH 162/584] [PATCH] hwmon: Off-by-one error in fscpos driver Coverity uncovered an off-by-one error in the fscpos driver, in function set_temp_reset(). Writing to the temp3_reset sysfs file will lead to an array overrun, in turn causing an I2C write to a random register of the FSC Poseidon chip. Additionally, writing to temp1_reset and temp2_reset will not work as expected. The fix is straightforward. Signed-off-by: Jean Delvare Signed-off-by: Linus Torvalds --- drivers/hwmon/fscpos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c index 270015b626ad..301ae98bd0ad 100644 --- a/drivers/hwmon/fscpos.c +++ b/drivers/hwmon/fscpos.c @@ -167,7 +167,7 @@ static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data "experience to the module author.\n"); /* Supported value: 2 (clears the status) */ - fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr], 2); + fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2); return count; } From ca2f3daf779f5e89d14e9783fcfd7920842df9e9 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Thu, 25 Aug 2005 12:47:50 -0700 Subject: [PATCH 163/584] [PATCH] undo partial cpu_exclusive sched domain disabling The partial disabling of Dinakar's new facility to allow cpu_exclusive cpusets to define dynamic sched domains doesn't go far enough. At the suggestion of Nick Piggin and Dinakar, let us instead totally disable this facility for 2.6.13, in order to avoid problems first reported by John Hawkes (corrupt sched data structures and kernel oops). This patch removes the partial disabling code in 2.6.13-rc7, in anticipation of the next patch, which will totally disable it instead. Signed-off-by: Paul Jackson Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d7f4d0c95737..21a4e3b2cbda 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -635,25 +635,6 @@ static void update_cpu_domains(struct cpuset *cur) if (par == NULL || cpus_empty(cur->cpus_allowed)) return; - /* - * Hack to avoid 2.6.13 partial node dynamic sched domain bug. - * Require the 'cpu_exclusive' cpuset to include all (or none) - * of the CPUs on each node, or return w/o changing sched domains. - * Remove this hack when dynamic sched domains fixed. - */ - { - int i, j; - - for_each_cpu_mask(i, cur->cpus_allowed) { - cpumask_t mask = node_to_cpumask(cpu_to_node(i)); - - for_each_cpu_mask(j, mask) { - if (!cpu_isset(j, cur->cpus_allowed)) - return; - } - } - } - /* * Get all cpus from parent's cpus_allowed not part of exclusive * children From 212d6d2237f60bc28c1518f8abf9d3ed6c17574a Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Thu, 25 Aug 2005 12:47:56 -0700 Subject: [PATCH 164/584] [PATCH] completely disable cpu_exclusive sched domain At the suggestion of Nick Piggin and Dinakar, totally disable the facility to allow cpu_exclusive cpusets to define dynamic sched domains in Linux 2.6.13, in order to avoid problems first reported by John Hawkes (corrupt sched data structures and kernel oops). This has been built for ppc64, i386, ia64, x86_64, sparc, alpha. It has been built, booted and tested for cpuset functionality on an SN2 (ia64). Dinakar or Nick - could you verify that it for sure does avoid the problems Hawkes reported. Hawkes is out of town, and I don't have the recipe to reproduce what he found. Signed-off-by: Paul Jackson Acked-by: Nick Piggin Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 21a4e3b2cbda..8ab1b4e518b8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -627,6 +627,14 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * Call with cpuset_sem held. May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. */ + +/* + * Hack to avoid 2.6.13 partial node dynamic sched domain bug. + * Disable letting 'cpu_exclusive' cpusets define dynamic sched + * domains, until the sched domain can handle partial nodes. + * Remove this #if hackery when sched domains fixed. + */ +#if 0 static void update_cpu_domains(struct cpuset *cur) { struct cpuset *c, *par = cur->parent; @@ -667,6 +675,11 @@ static void update_cpu_domains(struct cpuset *cur) partition_sched_domains(&pspan, &cspan); unlock_cpu_hotplug(); } +#else +static void update_cpu_domains(struct cpuset *cur) +{ +} +#endif static int update_cpumask(struct cpuset *cs, char *buf) { From 953d1137fc4aba16deace262e93974913596dcfe Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 26 Aug 2005 19:46:24 -0400 Subject: [PATCH 165/584] [libata sata_sil] list documentation URL, since its public --- drivers/scsi/sata_sil.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index e6902645f397..9d24d6c328b4 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -24,6 +24,11 @@ * If you do not delete the provisions above, a recipient may use your * version of this file under either the OSL or the GPL. * + * Documentation for SiI 3112: + * http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2 + * + * Other errata and documentation available under NDA. + * */ #include From 32818c2eb6b83ea5065c89e0c3cf774abc4dc02b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 26 Aug 2005 18:34:07 -0700 Subject: [PATCH 166/584] [PATCH] ppc64: Fix issue with gcc 4.0 compiled kernels I recently had a BUG_ON() go off spuriously on a gcc 4.0 compiled kernel. It turns out gcc-4.0 was removing a sign extension while earlier gcc versions would not. Thinking this to be a compiler bug, I submitted a report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23422 It turns out we need to cast the input in order to tell gcc to sign extend it. Thanks to Andrew Pinski for his help on this bug. Signed-off-by: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-ppc64/bug.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/asm-ppc64/bug.h b/include/asm-ppc64/bug.h index 169868fa307d..160178278861 100644 --- a/include/asm-ppc64/bug.h +++ b/include/asm-ppc64/bug.h @@ -43,8 +43,8 @@ struct bug_entry *find_bug(unsigned long bugaddr); ".section __bug_table,\"a\"\n\t" \ " .llong 1b,%1,%2,%3\n" \ ".previous" \ - : : "r" (x), "i" (__LINE__), "i" (__FILE__), \ - "i" (__FUNCTION__)); \ + : : "r" ((long long)(x)), "i" (__LINE__), \ + "i" (__FILE__), "i" (__FUNCTION__)); \ } while (0) #define WARN_ON(x) do { \ @@ -53,7 +53,8 @@ struct bug_entry *find_bug(unsigned long bugaddr); ".section __bug_table,\"a\"\n\t" \ " .llong 1b,%1,%2,%3\n" \ ".previous" \ - : : "r" (x), "i" (__LINE__ + BUG_WARNING_TRAP), \ + : : "r" ((long long)(x)), \ + "i" (__LINE__ + BUG_WARNING_TRAP), \ "i" (__FILE__), "i" (__FUNCTION__)); \ } while (0) From bebf4688e9dbbfdd421736685d607bced91a3c91 Mon Sep 17 00:00:00 2001 From: "Mark M. Hoffman" Date: Fri, 26 Aug 2005 18:34:08 -0700 Subject: [PATCH 167/584] [PATCH] I2C hwmon: kfree fixes This patch fixes several instances of hwmon drivers kfree'ing the "wrong" pointer; the existing code works somewhat by accident. (akpm: plucked from Greg's queue based on lkml discussion. Finishes off the patch from Jon Corbet) Signed-off-by: Mark M. Hoffman Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/adm1026.c | 2 +- drivers/hwmon/adm1031.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index ded6e8adc854..c8a7f47911f9 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -325,7 +325,7 @@ int adm1026_attach_adapter(struct i2c_adapter *adapter) int adm1026_detach_client(struct i2c_client *client) { i2c_detach_client(client); - kfree(client); + kfree(i2c_get_clientdata(client)); return 0; } diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 4211c8b4601a..936250957270 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -845,7 +845,7 @@ static int adm1031_detach_client(struct i2c_client *client) if ((ret = i2c_detach_client(client)) != 0) { return ret; } - kfree(client); + kfree(i2c_get_clientdata(client)); return 0; } From 485761bd6a72d33b3d4fa884927b2b0d983b701e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 26 Aug 2005 18:34:10 -0700 Subject: [PATCH 168/584] [PATCH] x86_64: Tell VM about holes in nodes Some nodes can have large holes on x86-64. This fixes problems with the VM allowing too many dirty pages because it overestimates the number of available RAM in a node. In extreme cases you can end up with all RAM filled with dirty pages which can lead to deadlocks and other nasty behaviour. This patch just tells the VM about the known holes from e820. Reserved (like the kernel text or mem_map) is still not taken into account, but that should be only a few percent error now. Small detail is that the flat setup uses the NUMA free_area_init_node() now too because it offers more flexibility. (akpm: lotsa thanks to Martin for working this problem out) Cc: Martin Bligh Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 34 ++++++++++++++++++++++++++++++++++ arch/x86_64/mm/init.c | 16 ++++++++++++---- arch/x86_64/mm/numa.c | 8 +++++++- include/asm-x86_64/e820.h | 2 ++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 6ded3a50dfe6..b548dea4e5b9 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -185,6 +185,40 @@ unsigned long __init e820_end_of_ram(void) } /* + * Compute how much memory is missing in a range. + * Unlike the other functions in this file the arguments are in page numbers. + */ +unsigned long __init +e820_hole_size(unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long ram = 0; + unsigned long start = start_pfn << PAGE_SHIFT; + unsigned long end = end_pfn << PAGE_SHIFT; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long last, addr; + + if (ei->type != E820_RAM || + ei->addr+ei->size <= start || + ei->addr >= end) + continue; + + addr = round_up(ei->addr, PAGE_SIZE); + if (addr < start) + addr = start; + + last = round_down(ei->addr + ei->size, PAGE_SIZE); + if (last >= end) + last = end; + + if (last > addr) + ram += last - addr; + } + return ((end - start) - ram) >> PAGE_SHIFT; +} + +/* * Mark e820 reserved areas as busy for the resource manager. */ void __init e820_reserve_resources(void) diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 72e4b364ed73..aa4a5189ecee 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -322,18 +322,26 @@ void zap_low_mappings(void) void __init paging_init(void) { { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long holes[MAX_NR_ZONES]; unsigned int max_dma; + memset(zones_size, 0, sizeof(zones_size)); + memset(holes, 0, sizeof(holes)); + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - if (end_pfn < max_dma) + if (end_pfn < max_dma) { zones_size[ZONE_DMA] = end_pfn; - else { + holes[ZONE_DMA] = e820_hole_size(0, end_pfn); + } else { zones_size[ZONE_DMA] = max_dma; + holes[ZONE_DMA] = e820_hole_size(0, max_dma); zones_size[ZONE_NORMAL] = end_pfn - max_dma; + holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn); } - free_area_init(zones_size); + free_area_init_node(0, NODE_DATA(0), zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); } return; } diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 70cb2904a90f..6a156f5692ae 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -126,9 +126,11 @@ void __init setup_node_zones(int nodeid) { unsigned long start_pfn, end_pfn; unsigned long zones[MAX_NR_ZONES]; + unsigned long holes[MAX_NR_ZONES]; unsigned long dma_end_pfn; memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); + memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); start_pfn = node_start_pfn(nodeid); end_pfn = node_end_pfn(nodeid); @@ -139,13 +141,17 @@ void __init setup_node_zones(int nodeid) dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; if (start_pfn < dma_end_pfn) { zones[ZONE_DMA] = dma_end_pfn - start_pfn; + holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; + holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); + } else { zones[ZONE_NORMAL] = end_pfn - start_pfn; + holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); } free_area_init_node(nodeid, NODE_DATA(nodeid), zones, - start_pfn, NULL); + start_pfn, holes); } void __init numa_init_array(void) diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index 8e94edf0b984..e682edc24a68 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -51,6 +51,8 @@ extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); extern void e820_setup_gap(void); +extern unsigned long e820_hole_size(unsigned long start_pfn, + unsigned long end_pfn); extern void __init parse_memopt(char *p, char **end); From 1c9cf6f9861f8d27303ee2531b3b7686269c71ce Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Fri, 26 Aug 2005 18:34:11 -0700 Subject: [PATCH 169/584] [PATCH] arm: fix IXP4xx flash resource range We are currently reserving one byte more than actually needed by the flash device and overlapping into the next I/O expansion bus window. This a) causes us to allocate an extra page of VM due to ARM ioremap() alignment code and b) could cause problems if another driver tries to request the next expansion bus window. Signed-off-by: Deepak Saxena Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-ixp4xx/coyote-setup.c | 2 +- arch/arm/mach-ixp4xx/gtwx5715-setup.c | 2 +- arch/arm/mach-ixp4xx/ixdp425-setup.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-ixp4xx/coyote-setup.c b/arch/arm/mach-ixp4xx/coyote-setup.c index 7f58afb27e71..411ea9996190 100644 --- a/arch/arm/mach-ixp4xx/coyote-setup.c +++ b/arch/arm/mach-ixp4xx/coyote-setup.c @@ -36,7 +36,7 @@ static struct flash_platform_data coyote_flash_data = { static struct resource coyote_flash_resource = { .start = COYOTE_FLASH_BASE, - .end = COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE, + .end = COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE - 1, .flags = IORESOURCE_MEM, }; diff --git a/arch/arm/mach-ixp4xx/gtwx5715-setup.c b/arch/arm/mach-ixp4xx/gtwx5715-setup.c index 65e356bd10d6..333459d6aa46 100644 --- a/arch/arm/mach-ixp4xx/gtwx5715-setup.c +++ b/arch/arm/mach-ixp4xx/gtwx5715-setup.c @@ -114,7 +114,7 @@ static struct flash_platform_data gtwx5715_flash_data = { static struct resource gtwx5715_flash_resource = { .start = GTWX5715_FLASH_BASE, - .end = GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE, + .end = GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE - 1, .flags = IORESOURCE_MEM, }; diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index 4633470a6a37..fa0646c8693b 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -36,7 +36,7 @@ static struct flash_platform_data ixdp425_flash_data = { static struct resource ixdp425_flash_resource = { .start = IXDP425_FLASH_BASE, - .end = IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE, + .end = IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE - 1, .flags = IORESOURCE_MEM, }; From e1bcfcaa0b3bec2a67b22c565a0bf508ea90db1d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 26 Aug 2005 18:34:14 -0700 Subject: [PATCH 170/584] [PATCH] IB: fix use-after-free in user verbs cleanup Fix a use-after-free bug in userspace verbs cleanup: we can't touch mr->device after we free mr by calling ib_dereg_mr(). Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/uverbs_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index eb99e693dec2..5f6e9ea29cd7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -130,13 +130,14 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_device *mrdev = mr->device; struct ib_umem_object *memobj; idr_remove(&ib_uverbs_mr_idr, uobj->id); ib_dereg_mr(mr); memobj = container_of(uobj, struct ib_umem_object, uobject); - ib_umem_release_on_close(mr->device, &memobj->umem); + ib_umem_release_on_close(mrdev, &memobj->umem); list_del(&uobj->list); kfree(memobj); From 72008652dae7d10fa668d7b2ada3bddff7403d86 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Aug 2005 18:34:15 -0700 Subject: [PATCH 171/584] [PATCH] md: create a MODULE_ALIAS for md corresponding to its block major number. I just discovered this is needed for module auto-loading. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 486ee50cfdda..deccd560c0de 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4011,3 +4011,4 @@ EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(md_check_recovery); MODULE_LICENSE("GPL"); MODULE_ALIAS("md"); +MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR); From 657390d25d4241705cb4fc5b3b4ba5b30575dc17 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Aug 2005 18:34:16 -0700 Subject: [PATCH 172/584] [PATCH] md: clear the 'recovery' flags when starting an md array. It's possible for this to still have flags in it and a previous instance has been stopped, and that confused the new array using the same mddev. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index deccd560c0de..20ca80b7dc20 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1689,6 +1689,7 @@ static int do_md_run(mddev_t * mddev) mddev->pers = pers[pnum]; spin_unlock(&pers_lock); + mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ /* before we start the array running, initialise the bitmap */ From 36676bcbf9f6bcbea9d06e67ee8d04eacde54952 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 26 Aug 2005 18:34:17 -0700 Subject: [PATCH 173/584] [PATCH] Fix oops in sysfs_hash_and_remove_file() The problem arises if an entity in sysfs is created and removed without ever having been made completely visible. In SCSI this is triggered by removing a device while it's initialising. The problem appears to be that because it was never made visible in sysfs, the sysfs dentry has a null d_inode which oopses when a reference is made to it. The solution is simply to check d_inode and assume the object was never made visible (and thus doesn't need deleting) if it's NULL. (akpm: possibly a stopgap for 2.6.13 scsi problems. May not be the long-term fix) Signed-off-by: James Bottomley Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/sysfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index d727dc960634..970a33f03299 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -228,6 +228,10 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) struct sysfs_dirent * sd; struct sysfs_dirent * parent_sd = dir->d_fsdata; + if (dir->d_inode == NULL) + /* no inode means this hasn't been made visible yet */ + return; + down(&dir->d_inode->i_sem); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) From 49f6a7fbe123dde25ca4193a7d60705784e18317 Mon Sep 17 00:00:00 2001 From: Tziporet Koren Date: Wed, 10 Aug 2005 23:00:50 -0700 Subject: [PATCH 174/584] [PATCH] IB: Update current firmware versions in mthca driver Update FW versions in mthca according to July 05 Mellanox release Signed-off-by: Tziporet Koren Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 2ef916859e17..687544157307 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -887,9 +887,9 @@ static struct { int is_memfree; int is_pcie; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 }, + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } }; From 2a1d9b7f09aaaacf235656cb32a40ba2c79590b3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 10 Aug 2005 23:03:10 -0700 Subject: [PATCH 175/584] [PATCH] IB: Add copyright notices Make some lawyers happy and add copyright notices for people who forgot to include them when they actually touched the code. Signed-off-by: Roland Dreier --- drivers/infiniband/core/agent.c | 11 ++++++----- drivers/infiniband/core/agent_priv.h | 10 +++++----- drivers/infiniband/core/cache.c | 3 +++ drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/packer.c | 1 + drivers/infiniband/core/sa_query.c | 2 +- drivers/infiniband/core/smi.c | 11 ++++++----- drivers/infiniband/core/sysfs.c | 2 ++ drivers/infiniband/core/ud_header.c | 1 + drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs.h | 2 ++ drivers/infiniband/core/uverbs_main.c | 2 ++ drivers/infiniband/core/uverbs_mem.c | 1 + drivers/infiniband/core/verbs.c | 1 + drivers/infiniband/hw/mthca/mthca_cmd.c | 1 + drivers/infiniband/hw/mthca/mthca_cmd.h | 1 + drivers/infiniband/hw/mthca/mthca_config_reg.h | 1 + drivers/infiniband/hw/mthca/mthca_cq.c | 2 ++ drivers/infiniband/hw/mthca/mthca_dev.h | 2 ++ drivers/infiniband/hw/mthca/mthca_doorbell.h | 1 + drivers/infiniband/hw/mthca/mthca_eq.c | 1 + drivers/infiniband/hw/mthca/mthca_mad.c | 2 ++ drivers/infiniband/hw/mthca/mthca_main.c | 1 + drivers/infiniband/hw/mthca/mthca_memfree.c | 1 + drivers/infiniband/hw/mthca/mthca_memfree.h | 1 + drivers/infiniband/hw/mthca/mthca_mr.c | 1 + drivers/infiniband/hw/mthca/mthca_pd.c | 1 + drivers/infiniband/hw/mthca/mthca_profile.c | 1 + drivers/infiniband/hw/mthca/mthca_profile.h | 1 + drivers/infiniband/hw/mthca/mthca_provider.c | 2 ++ drivers/infiniband/hw/mthca/mthca_provider.h | 1 + drivers/infiniband/hw/mthca/mthca_qp.c | 2 ++ drivers/infiniband/include/ib_cache.h | 2 ++ drivers/infiniband/include/ib_verbs.h | 1 + drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 3 +++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 1 + 39 files changed, 69 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 729f0b0d983a..3d36feb8c5ba 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h index 17435af1e914..2ec6d7f1b7d0 100644 --- a/drivers/infiniband/core/agent_priv.h +++ b/drivers/infiniband/core/agent_priv.h @@ -1,9 +1,9 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3042360c97e1..3a129db5ec27 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9197e92d708a..d3cf84e01587 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index eb5ff54c10d7..ed1684b09f92 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 795184931c83..b03bed2ed87a 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index b4b284324a33..1c0d733c3fce 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 90d51b179abe..b2e779996cbe 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index dc4eb1db5e96..b32d43ec0a33 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 2e38792df533..8a19dd4d38f8 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 7696022f9a4e..3e158f5acfc6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -1,6 +1,8 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5f6e9ea29cd7..fd8e96359304 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c index ed550f6595bd..36a32c315668 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/uverbs_mem.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 506fdf1f2a26..c301a2c41f34 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 1557a522d831..0ff5900e0930 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index ed517f175dd6..75a629639445 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index b4bfbbfe2c3d..afa56bfaab2e 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 5687c3014522..bd7807cec50c 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -2,6 +2,8 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 5ecdd2eeeb0f..33162a960c72 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -2,6 +2,8 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index 535fad7710fb..3be4a4a606a2 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index cbcf2b4722e4..54a809adab6d 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df223642015..3c7fae6cb12f 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 687544157307..2d539403bdac 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 2a8646150355..9efb0322c761 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index 4761d844cb5f..59c2f555b13b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index cbe50feaf680..15d9f8f290a0 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index c2c899844e98..3dbf06a6e6f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 4fedc32d5871..9b280661f2a1 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index 17aef3357661..0d4f070a3fa1 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 81919a7b4935..34e6b8685ba3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -2,6 +2,8 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 1d032791cc8b..727aad8d4f33 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index f7126b14d5ae..2f429815d195 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/include/ib_cache.h b/drivers/infiniband/include/ib_cache.h index 44ef6bb9b9df..fff031bc95df 100644 --- a/drivers/infiniband/include/ib_cache.h +++ b/drivers/infiniband/include/ib_cache.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index 5d24edaa66e6..8d5ea9568337 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 04c98f54e9c4..b91d3ef01b92 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index eee82363167d..cb4f8062677c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index fa00816a3cf7..b07383e5c76a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 70208c3d21e2..e03b070d5222 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 4933edf062c2..21b58aa76fee 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU From 92a6b34bf4d0d11c54b2a6bdd6240f98cb326200 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Sat, 13 Aug 2005 20:50:27 -0700 Subject: [PATCH 176/584] [PATCH] IB: Eliminate redundant NULL checks IPoIB: Eliminate NULL checks prior to calling kfree Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b07383e5c76a..d4300e4a36d8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -782,15 +782,11 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); - if (priv->rx_ring) { - kfree(priv->rx_ring); - priv->rx_ring = NULL; - } + kfree(priv->rx_ring); + kfree(priv->tx_ring); - if (priv->tx_ring) { - kfree(priv->tx_ring); - priv->tx_ring = NULL; - } + priv->rx_ring = NULL; + priv->tx_ring = NULL; } static void ipoib_setup(struct net_device *dev) From 97f52eb438be7caebe026421545619d8a0c1398a Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 13 Aug 2005 21:05:57 -0700 Subject: [PATCH 177/584] [PATCH] IB: sparse endianness cleanup Fix sparse warnings. Use __be* where appropriate. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 121 +++++------ drivers/infiniband/core/cm_msgs.h | 192 +++++++++--------- drivers/infiniband/core/mad.c | 10 +- drivers/infiniband/core/mad_priv.h | 6 +- drivers/infiniband/core/mad_rmpp.c | 2 +- drivers/infiniband/core/sysfs.c | 36 ++-- drivers/infiniband/core/ud_header.c | 8 +- drivers/infiniband/core/user_mad.c | 4 +- drivers/infiniband/hw/mthca/mthca_av.c | 24 +-- drivers/infiniband/hw/mthca/mthca_cmd.c | 40 ++-- drivers/infiniband/hw/mthca/mthca_cq.c | 89 ++++---- drivers/infiniband/hw/mthca/mthca_dev.h | 15 +- drivers/infiniband/hw/mthca/mthca_doorbell.h | 12 +- drivers/infiniband/hw/mthca/mthca_eq.c | 62 +++--- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_mcg.c | 36 ++-- drivers/infiniband/hw/mthca/mthca_memfree.c | 4 +- drivers/infiniband/hw/mthca/mthca_memfree.h | 4 +- drivers/infiniband/hw/mthca/mthca_mr.c | 32 +-- drivers/infiniband/hw/mthca/mthca_provider.c | 16 +- drivers/infiniband/hw/mthca/mthca_provider.h | 6 +- drivers/infiniband/hw/mthca/mthca_qp.c | 187 +++++++++-------- drivers/infiniband/include/ib_cm.h | 89 ++++---- drivers/infiniband/include/ib_mad.h | 22 +- drivers/infiniband/include/ib_sa.h | 18 +- drivers/infiniband/include/ib_smi.h | 18 +- drivers/infiniband/include/ib_user_cm.h | 28 +-- drivers/infiniband/include/ib_user_mad.h | 10 +- drivers/infiniband/include/ib_user_verbs.h | 4 +- drivers/infiniband/include/ib_verbs.h | 14 +- drivers/infiniband/ulp/ipoib/ipoib.h | 4 +- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +- .../infiniband/ulp/ipoib/ipoib_multicast.c | 6 +- 34 files changed, 565 insertions(+), 562 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 403ed125d8f4..781be773a186 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -83,7 +83,7 @@ struct cm_port { struct cm_device { struct list_head list; struct ib_device *device; - u64 ca_guid; + __be64 ca_guid; struct cm_port port[0]; }; @@ -100,8 +100,8 @@ struct cm_work { struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ - u32 local_id; /* Established / timewait */ - u32 remote_id; + __be32 local_id; /* Established / timewait */ + __be32 remote_id; struct ib_cm_event cm_event; struct ib_sa_path_rec path[0]; }; @@ -110,8 +110,8 @@ struct cm_timewait_info { struct cm_work work; /* Must be first. */ struct rb_node remote_qp_node; struct rb_node remote_id_node; - u64 remote_ca_guid; - u32 remote_qpn; + __be64 remote_ca_guid; + __be32 remote_qpn; u8 inserted_remote_qp; u8 inserted_remote_id; }; @@ -132,11 +132,11 @@ struct cm_id_private { struct cm_av alt_av; void *private_data; - u64 tid; - u32 local_qpn; - u32 remote_qpn; - u32 sq_psn; - u32 rq_psn; + __be64 tid; + __be32 local_qpn; + __be32 remote_qpn; + __be32 sq_psn; + __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; u8 private_data_len; @@ -253,7 +253,7 @@ static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, u16 dlid, u8 sl, u16 src_path_bits) { memset(ah_attr, 0, sizeof ah_attr); - ah_attr->dlid = be16_to_cpu(dlid); + ah_attr->dlid = dlid; ah_attr->sl = sl; ah_attr->src_path_bits = src_path_bits; ah_attr->port_num = port_num; @@ -264,7 +264,7 @@ static void cm_init_av_for_response(struct cm_port *port, { av->port = port; av->pkey_index = wc->pkey_index; - cm_set_ah_attr(&av->ah_attr, port->port_num, cpu_to_be16(wc->slid), + cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid, wc->sl, wc->dlid_path_bits); } @@ -295,8 +295,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) return ret; av->port = port; - cm_set_ah_attr(&av->ah_attr, av->port->port_num, path->dlid, - path->sl, path->slid & 0x7F); + cm_set_ah_attr(&av->ah_attr, av->port->port_num, + be16_to_cpu(path->dlid), path->sl, + be16_to_cpu(path->slid) & 0x7F); av->packet_life_time = path->packet_life_time; return 0; } @@ -309,26 +310,26 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) do { spin_lock_irqsave(&cm.lock, flags); ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, - (int *) &cm_id_priv->id.local_id); + (__force int *) &cm_id_priv->id.local_id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); return ret; } -static void cm_free_id(u32 local_id) +static void cm_free_id(__be32 local_id) { unsigned long flags; spin_lock_irqsave(&cm.lock, flags); - idr_remove(&cm.local_id_table, (int) local_id); + idr_remove(&cm.local_id_table, (__force int) local_id); spin_unlock_irqrestore(&cm.lock, flags); } -static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id) +static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, (int) local_id); + cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -339,7 +340,7 @@ static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id) return cm_id_priv; } -static struct cm_id_private * cm_acquire_id(u32 local_id, u32 remote_id) +static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; unsigned long flags; @@ -356,8 +357,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) struct rb_node **link = &cm.listen_service_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; - u64 service_id = cm_id_priv->id.service_id; - u64 service_mask = cm_id_priv->id.service_mask; + __be64 service_id = cm_id_priv->id.service_id; + __be64 service_mask = cm_id_priv->id.service_mask; while (*link) { parent = *link; @@ -376,7 +377,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(u64 service_id) +static struct cm_id_private * cm_find_listen(__be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -400,8 +401,8 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info struct rb_node **link = &cm.remote_id_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; - u64 remote_ca_guid = timewait_info->remote_ca_guid; - u32 remote_id = timewait_info->work.remote_id; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_id = timewait_info->work.remote_id; while (*link) { parent = *link; @@ -424,8 +425,8 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info return NULL; } -static struct cm_timewait_info * cm_find_remote_id(u64 remote_ca_guid, - u32 remote_id) +static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, + __be32 remote_id) { struct rb_node *node = cm.remote_id_table.rb_node; struct cm_timewait_info *timewait_info; @@ -453,8 +454,8 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info struct rb_node **link = &cm.remote_qp_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; - u64 remote_ca_guid = timewait_info->remote_ca_guid; - u32 remote_qpn = timewait_info->remote_qpn; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_qpn = timewait_info->remote_qpn; while (*link) { parent = *link; @@ -484,7 +485,7 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; union ib_gid *port_gid = &cm_id_priv->av.dgid; - u32 remote_id = cm_id_priv->id.remote_id; + __be32 remote_id = cm_id_priv->id.remote_id; while (*link) { parent = *link; @@ -598,7 +599,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) spin_unlock_irqrestore(&cm.lock, flags); } -static struct cm_timewait_info * cm_create_timewait_info(u32 local_id) +static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) { struct cm_timewait_info *timewait_info; @@ -715,14 +716,15 @@ retest: EXPORT_SYMBOL(ib_destroy_cm_id); int ib_cm_listen(struct ib_cm_id *cm_id, - u64 service_id, - u64 service_mask) + __be64 service_id, + __be64 service_mask) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; unsigned long flags; int ret = 0; - service_mask = service_mask ? service_mask : ~0ULL; + service_mask = service_mask ? service_mask : + __constant_cpu_to_be64(~0ULL); service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) @@ -735,8 +737,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) { - cm_id->service_id = __cpu_to_be64(cm.listen_service_id++); - cm_id->service_mask = ~0ULL; + cm_id->service_id = cpu_to_be64(cm.listen_service_id++); + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); } else { cm_id->service_id = service_id; cm_id->service_mask = service_mask; @@ -752,18 +754,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_listen); -static u64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, + enum cm_msg_sequence msg_seq) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) (cm_id_priv->id.local_id | (msg_seq << 30)); + low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | + (msg_seq << 30)); return cpu_to_be64(hi_tid | low_tid); } static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, - enum cm_msg_attr_id attr_id, u64 tid) + __be16 attr_id, __be64 tid) { hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_CM; @@ -896,7 +899,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto error1; } cm_id->service_id = param->service_id; - cm_id->service_mask = ~0ULL; + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -963,7 +966,7 @@ static int cm_issue_rej(struct cm_port *port, rej_msg->remote_comm_id = rcv_msg->local_comm_id; rej_msg->local_comm_id = rcv_msg->remote_comm_id; cm_rej_set_msg_rejected(rej_msg, msg_rejected); - rej_msg->reason = reason; + rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); @@ -977,8 +980,8 @@ static int cm_issue_rej(struct cm_port *port, return ret; } -static inline int cm_is_active_peer(u64 local_ca_guid, u64 remote_ca_guid, - u32 local_qpn, u32 remote_qpn) +static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, + __be32 local_qpn, __be32 remote_qpn) { return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || ((local_ca_guid == remote_ca_guid) && @@ -1137,7 +1140,7 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg, break; } - rej_msg->reason = reason; + rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); @@ -1276,7 +1279,7 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_id_priv->id.service_id = req_msg->service_id; - cm_id_priv->id.service_mask = ~0ULL; + cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); @@ -1969,7 +1972,7 @@ static void cm_format_rej_event(struct cm_work *work) param = &work->cm_event.param.rej_rcvd; param->ari = rej_msg->ari; param->ari_length = cm_rej_get_reject_info_len(rej_msg); - param->reason = rej_msg->reason; + param->reason = __be16_to_cpu(rej_msg->reason); work->cm_event.private_data = &rej_msg->private_data; } @@ -1978,20 +1981,20 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; unsigned long flags; - u32 remote_id; + __be32 remote_id; remote_id = rej_msg->local_comm_id; - if (rej_msg->reason == IB_CM_REJ_TIMEOUT) { + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { spin_lock_irqsave(&cm.lock, flags); - timewait_info = cm_find_remote_id( *((u64 *) rej_msg->ari), + timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { spin_unlock_irqrestore(&cm.lock, flags); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, - (int) timewait_info->work.local_id); + (__force int) timewait_info->work.local_id); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -2032,7 +2035,7 @@ static int cm_rej_handler(struct cm_work *work) /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: - if (rej_msg->reason == IB_CM_REJ_STALE_CONN) + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); @@ -2553,7 +2556,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = param->pkey; + sidr_req_msg->pkey = cpu_to_be16(param->pkey); sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) @@ -2580,7 +2583,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, goto out; cm_id->service_id = param->service_id; - cm_id->service_mask = ~0ULL; + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); @@ -2621,7 +2624,7 @@ static void cm_format_sidr_req_event(struct cm_work *work, sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; - param->pkey = sidr_req_msg->pkey; + param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; param->device = work->port->mad_agent->device; param->port = work->port->port_num; @@ -2645,7 +2648,7 @@ static int cm_sidr_req_handler(struct cm_work *work) sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; wc = work->mad_recv_wc->wc; - cm_id_priv->av.dgid.global.subnet_prefix = wc->slid; + cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, &cm_id_priv->av); @@ -2673,7 +2676,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; cm_id_priv->id.service_id = sidr_req_msg->service_id; - cm_id_priv->id.service_mask = ~0ULL; + cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); cm_format_sidr_req_event(work, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -3175,10 +3178,10 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -static u64 cm_get_ca_guid(struct ib_device *device) +static __be64 cm_get_ca_guid(struct ib_device *device) { struct ib_device_attr *device_attr; - u64 guid; + __be64 guid; int ret; device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 15a309a77b2b..807a9fbb38f5 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -43,19 +43,17 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -enum cm_msg_attr_id { - CM_REQ_ATTR_ID = __constant_htons(0x0010), - CM_MRA_ATTR_ID = __constant_htons(0x0011), - CM_REJ_ATTR_ID = __constant_htons(0x0012), - CM_REP_ATTR_ID = __constant_htons(0x0013), - CM_RTU_ATTR_ID = __constant_htons(0x0014), - CM_DREQ_ATTR_ID = __constant_htons(0x0015), - CM_DREP_ATTR_ID = __constant_htons(0x0016), - CM_SIDR_REQ_ATTR_ID = __constant_htons(0x0017), - CM_SIDR_REP_ATTR_ID = __constant_htons(0x0018), - CM_LAP_ATTR_ID = __constant_htons(0x0019), - CM_APR_ATTR_ID = __constant_htons(0x001A) -}; +#define CM_REQ_ATTR_ID __constant_htons(0x0010) +#define CM_MRA_ATTR_ID __constant_htons(0x0011) +#define CM_REJ_ATTR_ID __constant_htons(0x0012) +#define CM_REP_ATTR_ID __constant_htons(0x0013) +#define CM_RTU_ATTR_ID __constant_htons(0x0014) +#define CM_DREQ_ATTR_ID __constant_htons(0x0015) +#define CM_DREP_ATTR_ID __constant_htons(0x0016) +#define CM_SIDR_REQ_ATTR_ID __constant_htons(0x0017) +#define CM_SIDR_REP_ATTR_ID __constant_htons(0x0018) +#define CM_LAP_ATTR_ID __constant_htons(0x0019) +#define CM_APR_ATTR_ID __constant_htons(0x001A) enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, @@ -67,35 +65,35 @@ enum cm_msg_sequence { struct cm_req_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 rsvd4; - u64 service_id; - u64 local_ca_guid; - u32 rsvd24; - u32 local_qkey; + __be32 local_comm_id; + __be32 rsvd4; + __be64 service_id; + __be64 local_ca_guid; + __be32 rsvd24; + __be32 local_qkey; /* local QPN:24, responder resources:8 */ - u32 offset32; + __be32 offset32; /* local EECN:24, initiator depth:8 */ - u32 offset36; + __be32 offset36; /* * remote EECN:24, remote CM response timeout:5, * transport service type:2, end-to-end flow control:1 */ - u32 offset40; + __be32 offset40; /* starting PSN:24, local CM response timeout:5, retry count:3 */ - u32 offset44; - u16 pkey; + __be32 offset44; + __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; /* max CM Retries:4, SRQ:1, rsvd:3 */ u8 offset51; - u16 primary_local_lid; - u16 primary_remote_lid; + __be16 primary_local_lid; + __be16 primary_remote_lid; union ib_gid primary_local_gid; union ib_gid primary_remote_gid; /* flow label:20, rsvd:6, packet rate:6 */ - u32 primary_offset88; + __be32 primary_offset88; u8 primary_traffic_class; u8 primary_hop_limit; /* SL:4, subnet local:1, rsvd:3 */ @@ -103,12 +101,12 @@ struct cm_req_msg { /* local ACK timeout:5, rsvd:3 */ u8 primary_offset95; - u16 alt_local_lid; - u16 alt_remote_lid; + __be16 alt_local_lid; + __be16 alt_remote_lid; union ib_gid alt_local_gid; union ib_gid alt_remote_gid; /* flow label:20, rsvd:6, packet rate:6 */ - u32 alt_offset132; + __be32 alt_offset132; u8 alt_traffic_class; u8 alt_hop_limit; /* SL:4, subnet local:1, rsvd:3 */ @@ -120,12 +118,12 @@ struct cm_req_msg { } __attribute__ ((packed)); -static inline u32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) +static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); } -static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, u32 qpn) +static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn) { req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(req_msg->offset32) & @@ -208,13 +206,13 @@ static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, 0xFFFFFFFE)); } -static inline u32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) +static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); } static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, - u32 starting_psn) + __be32 starting_psn) { req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | (be32_to_cpu(req_msg->offset44) & 0x000000FF)); @@ -288,13 +286,13 @@ static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) ((srq & 0x1) << 3)); } -static inline u32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) +static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) { - return cpu_to_be32((be32_to_cpu(req_msg->primary_offset88) >> 12)); + return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12); } static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, - u32 flow_label) + __be32 flow_label) { req_msg->primary_offset88 = cpu_to_be32( (be32_to_cpu(req_msg->primary_offset88) & @@ -350,13 +348,13 @@ static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_m (local_ack_timeout << 3)); } -static inline u32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) +static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) { - return cpu_to_be32((be32_to_cpu(req_msg->alt_offset132) >> 12)); + return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12); } static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, - u32 flow_label) + __be32 flow_label) { req_msg->alt_offset132 = cpu_to_be32( (be32_to_cpu(req_msg->alt_offset132) & @@ -422,8 +420,8 @@ enum cm_msg_response { struct cm_mra_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; /* message MRAed:2, rsvd:6 */ u8 offset8; /* service timeout:5, rsvd:3 */ @@ -458,13 +456,13 @@ static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, struct cm_rej_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; /* message REJected:2, rsvd:6 */ u8 offset8; /* reject info length:7, rsvd:1. */ u8 offset9; - u16 reason; + __be16 reason; u8 ari[IB_CM_REJ_ARI_LENGTH]; u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; @@ -495,45 +493,45 @@ static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, struct cm_rep_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; - u32 local_qkey; + __be32 local_comm_id; + __be32 remote_comm_id; + __be32 local_qkey; /* local QPN:24, rsvd:8 */ - u32 offset12; + __be32 offset12; /* local EECN:24, rsvd:8 */ - u32 offset16; + __be32 offset16; /* starting PSN:24 rsvd:8 */ - u32 offset20; + __be32 offset20; u8 resp_resources; u8 initiator_depth; /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ u8 offset26; /* RNR retry count:3, SRQ:1, rsvd:5 */ u8 offset27; - u64 local_ca_guid; + __be64 local_ca_guid; u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); -static inline u32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) +static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); } -static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, u32 qpn) +static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) { rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } -static inline u32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) +static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); } static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, - u32 starting_psn) + __be32 starting_psn) { rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); @@ -600,8 +598,8 @@ static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) struct cm_rtu_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; @@ -610,21 +608,21 @@ struct cm_rtu_msg { struct cm_dreq_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; /* remote QPN/EECN:24, rsvd:8 */ - u32 offset8; + __be32 offset8; u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); -static inline u32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) +static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) { return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); } -static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn) +static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn) { dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); @@ -633,8 +631,8 @@ static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn) struct cm_drep_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; @@ -643,37 +641,37 @@ struct cm_drep_msg { struct cm_lap_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; - u32 rsvd8; + __be32 rsvd8; /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ - u32 offset12; - u32 rsvd16; + __be32 offset12; + __be32 rsvd16; - u16 alt_local_lid; - u16 alt_remote_lid; + __be16 alt_local_lid; + __be16 alt_remote_lid; union ib_gid alt_local_gid; union ib_gid alt_remote_gid; /* flow label:20, rsvd:4, traffic class:8 */ - u32 offset56; + __be32 offset56; u8 alt_hop_limit; /* rsvd:2, packet rate:6 */ - uint8_t offset61; + u8 offset61; /* SL:4, subnet local:1, rsvd:3 */ - uint8_t offset62; + u8 offset62; /* local ACK timeout:5, rsvd:3 */ - uint8_t offset63; + u8 offset63; u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); -static inline u32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) +static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) { return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); } -static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, u32 qpn) +static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn) { lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(lap_msg->offset12) & @@ -693,17 +691,17 @@ static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, 0xFFFFFF07)); } -static inline u32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) +static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) { - return be32_to_cpu(lap_msg->offset56) >> 12; + return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12); } static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, - u32 flow_label) + __be32 flow_label) { - lap_msg->offset56 = cpu_to_be32((flow_label << 12) | - (be32_to_cpu(lap_msg->offset56) & - 0x00000FFF)); + lap_msg->offset56 = cpu_to_be32( + (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); } static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) @@ -766,8 +764,8 @@ static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, struct cm_apr_msg { struct ib_mad_hdr hdr; - u32 local_comm_id; - u32 remote_comm_id; + __be32 local_comm_id; + __be32 remote_comm_id; u8 info_length; u8 ap_status; @@ -779,10 +777,10 @@ struct cm_apr_msg { struct cm_sidr_req_msg { struct ib_mad_hdr hdr; - u32 request_id; - u16 pkey; - u16 rsvd; - u64 service_id; + __be32 request_id; + __be16 pkey; + __be16 rsvd; + __be64 service_id; u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); @@ -790,26 +788,26 @@ struct cm_sidr_req_msg { struct cm_sidr_rep_msg { struct ib_mad_hdr hdr; - u32 request_id; + __be32 request_id; u8 status; u8 info_length; - u16 rsvd; + __be16 rsvd; /* QPN:24, rsvd:8 */ - u32 offset8; - u64 service_id; - u32 qkey; + __be32 offset8; + __be64 service_id; + __be32 qkey; u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); -static inline u32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) +static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) { return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); } static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, - u32 qpn) + __be32 qpn) { sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(sidr_rep_msg->offset8) & diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b97e210ce9c8..214493cb3a0b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -693,7 +693,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - build_smp_wc(send_wr->wr_id, smp->dr_slid, send_wr->wr.ud.pkey_index, + build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid), + send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); /* No GRH for DR SMP */ @@ -1554,7 +1555,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, } struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid) +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) { struct ib_mad_send_wr_private *mad_send_wr; @@ -1597,7 +1598,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; - u64 tid; + __be64 tid; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); @@ -2165,7 +2166,8 @@ static void local_completions(void *data) * Defined behavior is to complete response * before request */ - build_smp_wc(local->wr_id, IB_LID_PERMISSIVE, + build_smp_wc(local->wr_id, + be16_to_cpu(IB_LID_PERMISSIVE), 0 /* pkey index */, recv_mad_agent->agent.port_num, &wc); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 568da10b05ab..807b0f366353 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -121,7 +121,7 @@ struct ib_mad_send_wr_private { struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ - u64 tid; + __be64 tid; unsigned long timeout; int retries; int retry; @@ -144,7 +144,7 @@ struct ib_mad_local_private { struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ - u64 tid; + __be64 tid; }; struct ib_mad_mgmt_method_table { @@ -210,7 +210,7 @@ extern kmem_cache_t *ib_mad_cache; int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid); +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid); void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 8f1eb80e421f..d68bf7e220f9 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -61,7 +61,7 @@ struct mad_rmpp_recv { int seg_num; int newwin; - u64 tid; + __be64 tid; u32 src_qp; u16 slid; u8 mgmt_class; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index b2e779996cbe..bf7334e7fac6 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -255,14 +255,14 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, return ret; return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) gid.raw)[0]), - be16_to_cpu(((u16 *) gid.raw)[1]), - be16_to_cpu(((u16 *) gid.raw)[2]), - be16_to_cpu(((u16 *) gid.raw)[3]), - be16_to_cpu(((u16 *) gid.raw)[4]), - be16_to_cpu(((u16 *) gid.raw)[5]), - be16_to_cpu(((u16 *) gid.raw)[6]), - be16_to_cpu(((u16 *) gid.raw)[7])); + be16_to_cpu(((__be16 *) gid.raw)[0]), + be16_to_cpu(((__be16 *) gid.raw)[1]), + be16_to_cpu(((__be16 *) gid.raw)[2]), + be16_to_cpu(((__be16 *) gid.raw)[3]), + be16_to_cpu(((__be16 *) gid.raw)[4]), + be16_to_cpu(((__be16 *) gid.raw)[5]), + be16_to_cpu(((__be16 *) gid.raw)[6]), + be16_to_cpu(((__be16 *) gid.raw)[7])); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, @@ -334,11 +334,11 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, break; case 16: ret = sprintf(buf, "%u\n", - be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8))); + be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8))); break; case 32: ret = sprintf(buf, "%u\n", - be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8))); + be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8))); break; default: ret = 0; @@ -600,10 +600,10 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) return ret; return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[3])); + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3])); } static ssize_t show_node_guid(struct class_device *cdev, char *buf) @@ -617,10 +617,10 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) return ret; return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) &attr.node_guid)[0]), - be16_to_cpu(((u16 *) &attr.node_guid)[1]), - be16_to_cpu(((u16 *) &attr.node_guid)[2]), - be16_to_cpu(((u16 *) &attr.node_guid)[3])); + be16_to_cpu(((__be16 *) &attr.node_guid)[0]), + be16_to_cpu(((__be16 *) &attr.node_guid)[1]), + be16_to_cpu(((__be16 *) &attr.node_guid)[2]), + be16_to_cpu(((__be16 *) &attr.node_guid)[3])); } static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index b32d43ec0a33..89cd76d7c5a5 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -195,6 +195,7 @@ void ib_ud_header_init(int payload_bytes, struct ib_ud_header *header) { int header_len; + u16 packet_length; memset(header, 0, sizeof *header); @@ -209,7 +210,7 @@ void ib_ud_header_init(int payload_bytes, header->lrh.link_version = 0; header->lrh.link_next_header = grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - header->lrh.packet_length = (IB_LRH_BYTES + + packet_length = (IB_LRH_BYTES + IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + @@ -218,8 +219,7 @@ void ib_ud_header_init(int payload_bytes, header->grh_present = grh_present; if (grh_present) { - header->lrh.packet_length += IB_GRH_BYTES / 4; - + packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((IB_BTH_BYTES + @@ -230,7 +230,7 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - cpu_to_be16s(&header->lrh.packet_length); + header->lrh.packet_length = cpu_to_be16(packet_length); if (header->immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8a19dd4d38f8..16d91f187758 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -271,7 +271,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_send_wr *bad_wr; struct ib_rmpp_mad *rmpp_mad; u8 method; - u64 *tid; + __be64 *tid; int ret, length, hdr_len, data_len, rmpp_hdr_size; int rmpp_active = 0; @@ -316,7 +316,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); - ah_attr.grh.flow_label = packet->mad.hdr.flow_label; + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index d58dcbe66488..e596210f11b3 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -41,16 +41,16 @@ #include "mthca_dev.h" struct mthca_av { - u32 port_pd; - u8 reserved1; - u8 g_slid; - u16 dlid; - u8 reserved2; - u8 gid_index; - u8 msg_sr; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u32 dgid[4]; + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + __be32 dgid[4]; }; int mthca_create_ah(struct mthca_dev *dev, @@ -128,7 +128,7 @@ on_hca_fail: av, (unsigned long) ah->avdma); for (j = 0; j < 8; ++j) printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) av)[j])); + j * 4, be32_to_cpu(((__be32 *) av)[j])); } if (ah->type == MTHCA_AH_ON_HCA) { @@ -169,7 +169,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; header->lrh.destination_lid = ah->av->dlid; - header->lrh.source_lid = ah->av->g_slid & 0x7f; + header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f); if (ah->av->g_slid & 0x80) { header->grh_present = 1; header->grh.traffic_class = diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 0ff5900e0930..1e60487ecd7f 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -220,20 +220,20 @@ static int mthca_cmd_post(struct mthca_dev *dev, * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ - __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); - __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); - __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4); - __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); - __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); - __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); + __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4); /* __raw_writel may not order writes. */ wmb(); - __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) | - (event ? (1 << HCA_E_BIT) : 0) | - (op_modifier << HCR_OPMOD_SHIFT) | - op), dev->hcr + 6 * 4); + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), dev->hcr + 6 * 4); out: up(&dev->cmd.hcr_sem); @@ -274,12 +274,14 @@ static int mthca_cmd_poll(struct mthca_dev *dev, goto out; } - if (out_is_imm) { - memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64)); - be64_to_cpus(out_param); - } + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); - *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; out: up(&dev->cmd.poll_sem); @@ -1122,7 +1124,7 @@ int mthca_INIT_HCA(struct mthca_dev *dev, u8 *status) { struct mthca_mailbox *mailbox; - u32 *inbox; + __be32 *inbox; int err; #define INIT_HCA_IN_SIZE 0x200 @@ -1343,7 +1345,7 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) { struct mthca_mailbox *mailbox; - u64 *inbox; + __be64 *inbox; int err; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); @@ -1514,7 +1516,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, if (i % 8 == 0) printk(" [%02x] ", i * 4); printk(" %08x", - be32_to_cpu(((u32 *) mailbox->buf)[i + 2])); + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } @@ -1534,7 +1536,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, if (i % 8 == 0) printk("[%02x] ", i * 4); printk(" %08x", - be32_to_cpu(((u32 *) mailbox->buf)[i + 2])); + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index bd7807cec50c..907867d1f2e0 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -57,21 +57,21 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_cq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 error_eqn; /* Tavor only */ - u32 comp_eqn; - u32 pd; - u32 lkey; - u32 last_notified_index; - u32 solicit_producer_index; - u32 consumer_index; - u32 producer_index; - u32 cqn; - u32 ci_db; /* Arbel only */ - u32 state_db; /* Arbel only */ - u32 reserved; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; } __attribute__((packed)); #define MTHCA_CQ_STATUS_OK ( 0 << 28) @@ -110,31 +110,31 @@ enum { }; struct mthca_cqe { - u32 my_qpn; - u32 my_ee; - u32 rqpn; - u16 sl_g_mlpath; - u16 rlid; - u32 imm_etype_pkey_eec; - u32 byte_cnt; - u32 wqe; - u8 opcode; - u8 is_send; - u8 reserved; - u8 owner; + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + __be16 sl_g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; }; struct mthca_err_cqe { - u32 my_qpn; - u32 reserved1[3]; - u8 syndrome; - u8 reserved2; - u16 db_cnt; - u32 reserved3; - u32 wqe; - u8 opcode; - u8 reserved4[2]; - u8 owner; + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 reserved2; + __be16 db_cnt; + u32 reserved3; + __be32 wqe; + u8 opcode; + u8 reserved4[2]; + u8 owner; }; #define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) @@ -193,7 +193,7 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, int incr) { - u32 doorbell[2]; + __be32 doorbell[2]; if (mthca_is_memfree(dev)) { *cq->set_ci_db = cpu_to_be32(cq->cons_index); @@ -293,7 +293,7 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, { int err; int dbd; - u32 new_wqe; + __be32 new_wqe; if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { mthca_dbg(dev, "local QP operation err " @@ -586,13 +586,13 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); - doorbell[1] = 0xffffffff; + doorbell[1] = (__force __be32) 0xffffffff; mthca_write64(doorbell, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, @@ -604,9 +604,9 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) { struct mthca_cq *cq = to_mcq(ibcq); - u32 doorbell[2]; + __be32 doorbell[2]; u32 sn; - u32 ci; + __be32 ci; sn = cq->arm_sn & 3; ci = cpu_to_be32(cq->cons_index); @@ -813,7 +813,6 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); - cq_context->start = cpu_to_be64(0); cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); if (ctx) cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); @@ -906,7 +905,7 @@ void mthca_free_cq(struct mthca_dev *dev, mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { - u32 *ctx = mailbox->buf; + __be32 *ctx = mailbox->buf; int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 33162a960c72..3519ca4e086c 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -333,14 +333,13 @@ extern void __buggy_use_of_MTHCA_PUT(void); #define MTHCA_PUT(dest, source, offset) \ do { \ - __typeof__(source) *__p = \ - (__typeof__(source) *) ((char *) (dest) + (offset)); \ + void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ - case 1: *__p = (source); break; \ - case 2: *__p = cpu_to_be16(source); break; \ - case 4: *__p = cpu_to_be32(source); break; \ - case 8: *__p = cpu_to_be64(source); break; \ - default: __buggy_use_of_MTHCA_PUT(); \ + case 1: *(u8 *) __d = (source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ + case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ + case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ + default: __buggy_use_of_MTHCA_PUT(); \ } \ } while (0) @@ -435,7 +434,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe); + int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index 3be4a4a606a2..dd9a44d170c9 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -58,13 +58,13 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writeq((__force u64) val, dest); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { __raw_writeq(*(u64 *) val, dest); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { *(u64 *) db = *(u64 *) val; } @@ -87,18 +87,18 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writel(((__force u32 *) &val)[1], dest + 4); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { unsigned long flags; spin_lock_irqsave(doorbell_lock, flags); - __raw_writel(val[0], dest); - __raw_writel(val[1], dest + 4); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); spin_unlock_irqrestore(doorbell_lock, flags); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { db[0] = val[0]; wmb(); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 54a809adab6d..18f0981eb0c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -52,18 +52,18 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_eq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 tavor_pd; /* reserved for Arbel */ - u8 reserved1[3]; - u8 intr; - u32 arbel_pd; /* lost_count for Tavor */ - u32 lkey; - u32 reserved2[2]; - u32 consumer_index; - u32 producer_index; - u32 reserved3[4]; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; } __attribute__((packed)); #define MTHCA_EQ_STATUS_OK ( 0 << 28) @@ -128,28 +128,28 @@ struct mthca_eqe { union { u32 raw[6]; struct { - u32 cqn; + __be32 cqn; } __attribute__((packed)) comp; struct { - u16 reserved1; - u16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - u64 out_param; + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; } __attribute__((packed)) cmd; struct { - u32 qpn; + __be32 qpn; } __attribute__((packed)) qp; struct { - u32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; } __attribute__((packed)) cq_err; struct { - u32 reserved1[2]; - u32 port; + u32 reserved1[2]; + __be32 port; } __attribute__((packed)) port_change; } event; u8 reserved3[3]; @@ -168,7 +168,7 @@ static inline u64 async_mask(struct mthca_dev *dev) static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); @@ -191,8 +191,8 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u { /* See comment in tavor_set_eq_ci() above. */ wmb(); - __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base + - eq->eqn * 8); + __raw_writel((__force u32) cpu_to_be32(ci), + dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } @@ -207,7 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); doorbell[1] = 0; @@ -225,7 +225,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) { if (!mthca_is_memfree(dev)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); doorbell[1] = cpu_to_be32(cqn); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 3c7fae6cb12f..64fa78722cf6 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -194,7 +194,7 @@ int mthca_process_mad(struct ib_device *ibdev, { int err; u8 status; - u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE; + u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 5be7d949dbf6..a2707605f4c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -42,10 +42,10 @@ enum { }; struct mthca_mgm { - u32 next_gid_index; - u32 reserved[3]; - u8 gid[16]; - u32 qp[MTHCA_QP_PER_MGM]; + __be32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + __be32 qp[MTHCA_QP_PER_MGM]; }; static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -94,10 +94,14 @@ static int find_mgm(struct mthca_dev *dev, if (0) mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" "%04x:%04x:%04x:%04x is %04x\n", - be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]), - be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]), - be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]), - be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]), + be16_to_cpu(((__be16 *) gid)[0]), + be16_to_cpu(((__be16 *) gid)[1]), + be16_to_cpu(((__be16 *) gid)[2]), + be16_to_cpu(((__be16 *) gid)[3]), + be16_to_cpu(((__be16 *) gid)[4]), + be16_to_cpu(((__be16 *) gid)[5]), + be16_to_cpu(((__be16 *) gid)[6]), + be16_to_cpu(((__be16 *) gid)[7]), *hash); *index = *hash; @@ -258,14 +262,14 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (index == -1) { mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " "not found\n", - be16_to_cpu(((u16 *) gid->raw)[0]), - be16_to_cpu(((u16 *) gid->raw)[1]), - be16_to_cpu(((u16 *) gid->raw)[2]), - be16_to_cpu(((u16 *) gid->raw)[3]), - be16_to_cpu(((u16 *) gid->raw)[4]), - be16_to_cpu(((u16 *) gid->raw)[5]), - be16_to_cpu(((u16 *) gid->raw)[6]), - be16_to_cpu(((u16 *) gid->raw)[7])); + be16_to_cpu(((__be16 *) gid->raw)[0]), + be16_to_cpu(((__be16 *) gid->raw)[1]), + be16_to_cpu(((__be16 *) gid->raw)[2]), + be16_to_cpu(((__be16 *) gid->raw)[3]), + be16_to_cpu(((__be16 *) gid->raw)[4]), + be16_to_cpu(((__be16 *) gid->raw)[5]), + be16_to_cpu(((__be16 *) gid->raw)[6]), + be16_to_cpu(((__be16 *) gid->raw)[7])); err = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 9efb0322c761..fba0a53ba6ea 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -482,7 +482,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) { int group; int start, end, dir; @@ -565,7 +565,7 @@ found: page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5)); - *db = (u32 *) &page->db_rec[j]; + *db = (__be32 *) &page->db_rec[j]; out: up(&dev->db_tab->mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index 59c2f555b13b..bafa51544aa3 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -138,7 +138,7 @@ enum { struct mthca_db_page { DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); - u64 *db_rec; + __be64 *db_rec; dma_addr_t mapping; }; @@ -173,7 +173,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 15d9f8f290a0..0965e66061b7 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -51,18 +51,18 @@ struct mthca_mtt { * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ struct mthca_mpt_entry { - u32 flags; - u32 page_size; - u32 key; - u32 pd; - u64 start; - u64 length; - u32 lkey; - u32 window_count; - u32 window_count_limit; - u64 mtt_seg; - u32 mtt_sz; /* Arbel only */ - u32 reserved[2]; + __be32 flags; + __be32 page_size; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 window_count; + __be32 window_count_limit; + __be64 mtt_seg; + __be32 mtt_sz; /* Arbel only */ + u32 reserved[2]; } __attribute__((packed)); #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) @@ -248,7 +248,7 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, int start_index, u64 *buffer_list, int list_len) { struct mthca_mailbox *mailbox; - u64 *mtt_entry; + __be64 *mtt_entry; int err = 0; u8 status; int i; @@ -390,7 +390,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); if ((i + 1) % 4 == 0) printk("\n"); } @@ -563,7 +563,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); if ((i + 1) % 4 == 0) printk("\n"); } @@ -670,7 +670,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); mpt_entry.start = cpu_to_be64(iova); - writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, offsetof(struct mthca_mpt_entry, window_count) - offsetof(struct mthca_mpt_entry, start)); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 34e6b8685ba3..e2db5e001869 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -81,10 +81,10 @@ static int mthca_query_device(struct ib_device *ibdev, } props->device_cap_flags = mdev->device_cap_flags; - props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) & + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32)); + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); memcpy(&props->node_guid, out_mad->data + 12, 8); @@ -138,16 +138,16 @@ static int mthca_query_port(struct ib_device *ibdev, goto out; } - props->lid = be16_to_cpup((u16 *) (out_mad->data + 16)); + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18)); + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20)); + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; - props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; @@ -223,7 +223,7 @@ static int mthca_query_pkey(struct ib_device *ibdev, goto out; } - *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]); + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 727aad8d4f33..624651edf577 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -182,9 +182,9 @@ struct mthca_cq { /* Next fields are Arbel only */ int set_ci_db_index; - u32 *set_ci_db; + __be32 *set_ci_db; int arm_db_index; - u32 *arm_db; + __be32 *arm_db; int arm_sn; union { @@ -207,7 +207,7 @@ struct mthca_wq { int wqe_shift; int db_index; /* Arbel only */ - u32 *db; + __be32 *db; }; struct mthca_qp { diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 2f429815d195..8fbb4f1f5398 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -97,62 +97,62 @@ enum { }; struct mthca_qp_path { - u32 port_pkey; - u8 rnr_retry; - u8 g_mylmc; - u16 rlid; - u8 ackto; - u8 mgid_index; - u8 static_rate; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u8 rgid[16]; + __be32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 rgid[16]; } __attribute__((packed)); struct mthca_qp_context { - u32 flags; - u32 tavor_sched_queue; /* Reserved on Arbel */ - u8 mtu_msgmax; - u8 rq_size_stride; /* Reserved on Tavor */ - u8 sq_size_stride; /* Reserved on Tavor */ - u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ - u32 usr_page; - u32 local_qpn; - u32 remote_qpn; - u32 reserved1[2]; + __be32 flags; + __be32 tavor_sched_queue; /* Reserved on Arbel */ + u8 mtu_msgmax; + u8 rq_size_stride; /* Reserved on Tavor */ + u8 sq_size_stride; /* Reserved on Tavor */ + u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + u32 reserved1[2]; struct mthca_qp_path pri_path; struct mthca_qp_path alt_path; - u32 rdd; - u32 pd; - u32 wqe_base; - u32 wqe_lkey; - u32 params1; - u32 reserved2; - u32 next_send_psn; - u32 cqn_snd; - u32 snd_wqe_base_l; /* Next send WQE on Tavor */ - u32 snd_db_index; /* (debugging only entries) */ - u32 last_acked_psn; - u32 ssn; - u32 params2; - u32 rnr_nextrecvpsn; - u32 ra_buff_indx; - u32 cqn_rcv; - u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ - u32 rcv_db_index; /* (debugging only entries) */ - u32 qkey; - u32 srqn; - u32 rmsn; - u16 rq_wqe_counter; /* reserved on Tavor */ - u16 sq_wqe_counter; /* reserved on Tavor */ - u32 reserved3[18]; + __be32 rdd; + __be32 pd; + __be32 wqe_base; + __be32 wqe_lkey; + __be32 params1; + __be32 reserved2; + __be32 next_send_psn; + __be32 cqn_snd; + __be32 snd_wqe_base_l; /* Next send WQE on Tavor */ + __be32 snd_db_index; /* (debugging only entries) */ + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 ra_buff_indx; + __be32 cqn_rcv; + __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ + __be32 rcv_db_index; /* (debugging only entries) */ + __be32 qkey; + __be32 srqn; + __be32 rmsn; + __be16 rq_wqe_counter; /* reserved on Tavor */ + __be16 sq_wqe_counter; /* reserved on Tavor */ + u32 reserved3[18]; } __attribute__((packed)); struct mthca_qp_param { - u32 opt_param_mask; - u32 reserved1; + __be32 opt_param_mask; + u32 reserved1; struct mthca_qp_context context; - u32 reserved2[62]; + u32 reserved2[62]; } __attribute__((packed)); enum { @@ -191,62 +191,62 @@ enum { }; struct mthca_next_seg { - u32 nda_op; /* [31:6] next WQE [4:0] next opcode */ - u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ - u32 flags; /* [3] CQ [2] Event [1] Solicit */ - u32 imm; /* immediate data */ + __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + __be32 flags; /* [3] CQ [2] Event [1] Solicit */ + __be32 imm; /* immediate data */ }; struct mthca_tavor_ud_seg { - u32 reserved1; - u32 lkey; - u64 av_addr; - u32 reserved2[4]; - u32 dqpn; - u32 qkey; - u32 reserved3[2]; + u32 reserved1; + __be32 lkey; + __be64 av_addr; + u32 reserved2[4]; + __be32 dqpn; + __be32 qkey; + u32 reserved3[2]; }; struct mthca_arbel_ud_seg { - u32 av[8]; - u32 dqpn; - u32 qkey; - u32 reserved[2]; + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + u32 reserved[2]; }; struct mthca_bind_seg { - u32 flags; /* [31] Atomic [30] rem write [29] rem read */ - u32 reserved; - u32 new_rkey; - u32 lkey; - u64 addr; - u64 length; + __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; }; struct mthca_raddr_seg { - u64 raddr; - u32 rkey; - u32 reserved; + __be64 raddr; + __be32 rkey; + u32 reserved; }; struct mthca_atomic_seg { - u64 swap_add; - u64 compare; + __be64 swap_add; + __be64 compare; }; struct mthca_data_seg { - u32 byte_count; - u32 lkey; - u64 addr; + __be32 byte_count; + __be32 lkey; + __be64 addr; }; struct mthca_mlx_seg { - u32 nda_op; - u32 nds; - u32 flags; /* [17] VL15 [16] SLR [14:12] static rate + __be32 nda_op; + __be32 nds; + __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate [11:8] SL [3] C [2] E */ - u16 rlid; - u16 vcrc; + __be16 rlid; + __be16 vcrc; }; static const u8 mthca_opcode[] = { @@ -1459,6 +1459,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, { int header_size; int err; + u16 pkey; ib_ud_header_init(256, /* assume a MAD */ sqp->ud_header.grh_present, @@ -1469,8 +1470,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == 0xffff ? - MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; @@ -1490,18 +1491,16 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == 0xffff) - sqp->ud_header.lrh.source_lid = 0xffff; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->port, - sqp->pkey_index, - &sqp->ud_header.bth.pkey); + sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->port, - wr->wr.ud.pkey_index, - &sqp->ud_header.bth.pkey); - cpu_to_be16s(&sqp->ud_header.bth.pkey); + wr->wr.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? @@ -1744,7 +1743,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + qp->send_wqe_offset) | f0 | op0); @@ -1845,7 +1844,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); @@ -2066,7 +2065,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((nreq << 24) | ((qp->sq.head & 0xffff) << 8) | @@ -2176,7 +2175,7 @@ out: } int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe) + int index, int *dbd, __be32 *new_wqe) { struct mthca_next_seg *next; diff --git a/drivers/infiniband/include/ib_cm.h b/drivers/infiniband/include/ib_cm.h index da650115e79a..8202ad2e6435 100644 --- a/drivers/infiniband/include/ib_cm.h +++ b/drivers/infiniband/include/ib_cm.h @@ -115,7 +115,7 @@ struct ib_cm_req_event_param { struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; - u64 remote_ca_guid; + __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; enum ib_qp_type qp_type; @@ -132,7 +132,7 @@ struct ib_cm_req_event_param { }; struct ib_cm_rep_event_param { - u64 remote_ca_guid; + __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; u32 starting_psn; @@ -146,39 +146,39 @@ struct ib_cm_rep_event_param { }; enum ib_cm_rej_reason { - IB_CM_REJ_NO_QP = __constant_htons(1), - IB_CM_REJ_NO_EEC = __constant_htons(2), - IB_CM_REJ_NO_RESOURCES = __constant_htons(3), - IB_CM_REJ_TIMEOUT = __constant_htons(4), - IB_CM_REJ_UNSUPPORTED = __constant_htons(5), - IB_CM_REJ_INVALID_COMM_ID = __constant_htons(6), - IB_CM_REJ_INVALID_COMM_INSTANCE = __constant_htons(7), - IB_CM_REJ_INVALID_SERVICE_ID = __constant_htons(8), - IB_CM_REJ_INVALID_TRANSPORT_TYPE = __constant_htons(9), - IB_CM_REJ_STALE_CONN = __constant_htons(10), - IB_CM_REJ_RDC_NOT_EXIST = __constant_htons(11), - IB_CM_REJ_INVALID_GID = __constant_htons(12), - IB_CM_REJ_INVALID_LID = __constant_htons(13), - IB_CM_REJ_INVALID_SL = __constant_htons(14), - IB_CM_REJ_INVALID_TRAFFIC_CLASS = __constant_htons(15), - IB_CM_REJ_INVALID_HOP_LIMIT = __constant_htons(16), - IB_CM_REJ_INVALID_PACKET_RATE = __constant_htons(17), - IB_CM_REJ_INVALID_ALT_GID = __constant_htons(18), - IB_CM_REJ_INVALID_ALT_LID = __constant_htons(19), - IB_CM_REJ_INVALID_ALT_SL = __constant_htons(20), - IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = __constant_htons(21), - IB_CM_REJ_INVALID_ALT_HOP_LIMIT = __constant_htons(22), - IB_CM_REJ_INVALID_ALT_PACKET_RATE = __constant_htons(23), - IB_CM_REJ_PORT_CM_REDIRECT = __constant_htons(24), - IB_CM_REJ_PORT_REDIRECT = __constant_htons(25), - IB_CM_REJ_INVALID_MTU = __constant_htons(26), - IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = __constant_htons(27), - IB_CM_REJ_CONSUMER_DEFINED = __constant_htons(28), - IB_CM_REJ_INVALID_RNR_RETRY = __constant_htons(29), - IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = __constant_htons(30), - IB_CM_REJ_INVALID_CLASS_VERSION = __constant_htons(31), - IB_CM_REJ_INVALID_FLOW_LABEL = __constant_htons(32), - IB_CM_REJ_INVALID_ALT_FLOW_LABEL = __constant_htons(33) + IB_CM_REJ_NO_QP = 1, + IB_CM_REJ_NO_EEC = 2, + IB_CM_REJ_NO_RESOURCES = 3, + IB_CM_REJ_TIMEOUT = 4, + IB_CM_REJ_UNSUPPORTED = 5, + IB_CM_REJ_INVALID_COMM_ID = 6, + IB_CM_REJ_INVALID_COMM_INSTANCE = 7, + IB_CM_REJ_INVALID_SERVICE_ID = 8, + IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, + IB_CM_REJ_STALE_CONN = 10, + IB_CM_REJ_RDC_NOT_EXIST = 11, + IB_CM_REJ_INVALID_GID = 12, + IB_CM_REJ_INVALID_LID = 13, + IB_CM_REJ_INVALID_SL = 14, + IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, + IB_CM_REJ_INVALID_HOP_LIMIT = 16, + IB_CM_REJ_INVALID_PACKET_RATE = 17, + IB_CM_REJ_INVALID_ALT_GID = 18, + IB_CM_REJ_INVALID_ALT_LID = 19, + IB_CM_REJ_INVALID_ALT_SL = 20, + IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, + IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, + IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, + IB_CM_REJ_PORT_CM_REDIRECT = 24, + IB_CM_REJ_PORT_REDIRECT = 25, + IB_CM_REJ_INVALID_MTU = 26, + IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, + IB_CM_REJ_CONSUMER_DEFINED = 28, + IB_CM_REJ_INVALID_RNR_RETRY = 29, + IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, + IB_CM_REJ_INVALID_CLASS_VERSION = 31, + IB_CM_REJ_INVALID_FLOW_LABEL = 32, + IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 }; struct ib_cm_rej_event_param { @@ -222,8 +222,7 @@ struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; struct ib_device *device; u8 port; - - u16 pkey; + u16 pkey; }; enum ib_cm_sidr_status { @@ -285,12 +284,12 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_id { ib_cm_handler cm_handler; void *context; - u64 service_id; - u64 service_mask; + __be64 service_id; + __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ - u32 local_id; - u32 remote_id; + __be32 local_id; + __be32 remote_id; }; /** @@ -330,13 +329,13 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); * IB_CM_ASSIGN_SERVICE_ID. */ int ib_cm_listen(struct ib_cm_id *cm_id, - u64 service_id, - u64 service_mask); + __be64 service_id, + __be64 service_mask); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; - u64 service_id; + __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; u32 starting_psn; @@ -528,7 +527,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param { struct ib_sa_path_rec *path; - u64 service_id; + __be64 service_id; int timeout_ms; const void *private_data; u8 private_data_len; diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index 491b6f25b3b8..63237805d6af 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -111,12 +111,12 @@ struct ib_mad_hdr { u8 mgmt_class; u8 class_version; u8 method; - u16 status; - u16 class_specific; - u64 tid; - u16 attr_id; - u16 resv; - u32 attr_mod; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; }; struct ib_rmpp_hdr { @@ -124,8 +124,8 @@ struct ib_rmpp_hdr { u8 rmpp_type; u8 rmpp_rtime_flags; u8 rmpp_status; - u32 seg_num; - u32 paylen_newwin; + __be32 seg_num; + __be32 paylen_newwin; }; typedef u64 __bitwise ib_sa_comp_mask; @@ -139,9 +139,9 @@ typedef u64 __bitwise ib_sa_comp_mask; * the wire so we can't change the layout) */ struct ib_sa_hdr { - u64 sm_key; - u16 attr_offset; - u16 reserved; + __be64 sm_key; + __be16 attr_offset; + __be16 reserved; ib_sa_comp_mask comp_mask; } __attribute__ ((packed)); diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h index 6d999f7b5d93..326159c04aca 100644 --- a/drivers/infiniband/include/ib_sa.h +++ b/drivers/infiniband/include/ib_sa.h @@ -133,16 +133,16 @@ struct ib_sa_path_rec { /* reserved */ union ib_gid dgid; union ib_gid sgid; - u16 dlid; - u16 slid; + __be16 dlid; + __be16 slid; int raw_traffic; /* reserved */ - u32 flow_label; + __be32 flow_label; u8 hop_limit; u8 traffic_class; int reversible; u8 numb_path; - u16 pkey; + __be16 pkey; /* reserved */ u8 sl; u8 mtu_selector; @@ -176,18 +176,18 @@ struct ib_sa_path_rec { struct ib_sa_mcmember_rec { union ib_gid mgid; union ib_gid port_gid; - u32 qkey; - u16 mlid; + __be32 qkey; + __be16 mlid; u8 mtu_selector; u8 mtu; u8 traffic_class; - u16 pkey; + __be16 pkey; u8 rate_selector; u8 rate; u8 packet_life_time_selector; u8 packet_life_time; u8 sl; - u32 flow_label; + __be32 flow_label; u8 hop_limit; u8 scope; u8 join_state; @@ -238,7 +238,7 @@ struct ib_sa_mcmember_rec { struct ib_sa_service_rec { u64 id; union ib_gid gid; - u16 pkey; + __be16 pkey; /* reserved */ u32 lease; u8 key[16]; diff --git a/drivers/infiniband/include/ib_smi.h b/drivers/infiniband/include/ib_smi.h index ca8216514963..c07b31cb9499 100644 --- a/drivers/infiniband/include/ib_smi.h +++ b/drivers/infiniband/include/ib_smi.h @@ -41,8 +41,6 @@ #include -#define IB_LID_PERMISSIVE 0xFFFF - #define IB_SMP_DATA_SIZE 64 #define IB_SMP_MAX_PATH_HOPS 64 @@ -51,16 +49,16 @@ struct ib_smp { u8 mgmt_class; u8 class_version; u8 method; - u16 status; + __be16 status; u8 hop_ptr; u8 hop_cnt; - u64 tid; - u16 attr_id; - u16 resv; - u32 attr_mod; - u64 mkey; - u16 dr_slid; - u16 dr_dlid; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; u8 reserved[28]; u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; diff --git a/drivers/infiniband/include/ib_user_cm.h b/drivers/infiniband/include/ib_user_cm.h index 500b1af6ff77..72182d16778b 100644 --- a/drivers/infiniband/include/ib_user_cm.h +++ b/drivers/infiniband/include/ib_user_cm.h @@ -88,15 +88,15 @@ struct ib_ucm_attr_id { }; struct ib_ucm_attr_id_resp { - __u64 service_id; - __u64 service_mask; - __u32 local_id; - __u32 remote_id; + __be64 service_id; + __be64 service_mask; + __be32 local_id; + __be32 remote_id; }; struct ib_ucm_listen { - __u64 service_id; - __u64 service_mask; + __be64 service_id; + __be64 service_mask; __u32 id; }; @@ -114,13 +114,13 @@ struct ib_ucm_private_data { struct ib_ucm_path_rec { __u8 dgid[16]; __u8 sgid[16]; - __u16 dlid; - __u16 slid; + __be16 dlid; + __be16 slid; __u32 raw_traffic; - __u32 flow_label; + __be32 flow_label; __u32 reversible; __u32 mtu; - __u16 pkey; + __be16 pkey; __u8 hop_limit; __u8 traffic_class; __u8 numb_path; @@ -138,7 +138,7 @@ struct ib_ucm_req { __u32 qpn; __u32 qp_type; __u32 psn; - __u64 sid; + __be64 sid; __u64 data; __u64 primary_path; __u64 alternate_path; @@ -200,7 +200,7 @@ struct ib_ucm_lap { struct ib_ucm_sidr_req { __u32 id; __u32 timeout; - __u64 sid; + __be64 sid; __u64 data; __u64 path; __u16 pkey; @@ -237,7 +237,7 @@ struct ib_ucm_req_event_resp { /* port */ struct ib_ucm_path_rec primary_path; struct ib_ucm_path_rec alternate_path; - __u64 remote_ca_guid; + __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 qp_type; @@ -253,7 +253,7 @@ struct ib_ucm_req_event_resp { }; struct ib_ucm_rep_event_resp { - __u64 remote_ca_guid; + __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 starting_psn; diff --git a/drivers/infiniband/include/ib_user_mad.h b/drivers/infiniband/include/ib_user_mad.h index a9a56b50aacc..44537aa32e62 100644 --- a/drivers/infiniband/include/ib_user_mad.h +++ b/drivers/infiniband/include/ib_user_mad.h @@ -70,8 +70,6 @@ * @traffic_class - Traffic class in GRH * @gid - Remote GID in GRH * @flow_label - Flow label in GRH - * - * All multi-byte quantities are stored in network (big endian) byte order. */ struct ib_user_mad_hdr { __u32 id; @@ -79,9 +77,9 @@ struct ib_user_mad_hdr { __u32 timeout_ms; __u32 retries; __u32 length; - __u32 qpn; - __u32 qkey; - __u16 lid; + __be32 qpn; + __be32 qkey; + __be16 lid; __u8 sl; __u8 path_bits; __u8 grh_present; @@ -89,7 +87,7 @@ struct ib_user_mad_hdr { __u8 hop_limit; __u8 traffic_class; __u8 gid[16]; - __u32 flow_label; + __be32 flow_label; }; /** diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h index 7c613706af72..35857857aa3e 100644 --- a/drivers/infiniband/include/ib_user_verbs.h +++ b/drivers/infiniband/include/ib_user_verbs.h @@ -143,8 +143,8 @@ struct ib_uverbs_query_device { struct ib_uverbs_query_device_resp { __u64 fw_ver; - __u64 node_guid; - __u64 sys_image_guid; + __be64 node_guid; + __be64 sys_image_guid; __u64 max_mr_size; __u64 page_size_cap; __u32 vendor_id; diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index 8d5ea9568337..042a7d11fbcc 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -51,8 +51,8 @@ union ib_gid { u8 raw[16]; struct { - u64 subnet_prefix; - u64 interface_id; + __be64 subnet_prefix; + __be64 interface_id; } global; }; @@ -88,8 +88,8 @@ enum ib_atomic_cap { struct ib_device_attr { u64 fw_ver; - u64 node_guid; - u64 sys_image_guid; + __be64 node_guid; + __be64 sys_image_guid; u64 max_mr_size; u64 page_size_cap; u32 vendor_id; @@ -291,8 +291,8 @@ struct ib_global_route { }; struct ib_grh { - u32 version_tclass_flow; - u16 paylen; + __be32 version_tclass_flow; + __be16 paylen; u8 next_hdr; u8 hop_limit; union ib_gid sgid; @@ -303,6 +303,8 @@ enum { IB_MULTICAST_QPN = 0xffffff }; +#define IB_LID_PERMISSIVE __constant_htons(0xFFFF) + enum ib_ah_flags { IB_AH_GRH = 1 }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b91d3ef01b92..e23041c7be8f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -90,8 +90,8 @@ enum { /* structs */ struct ipoib_header { - u16 proto; - u16 reserved; + __be16 proto; + u16 reserved; }; struct ipoib_pseudoheader { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index a84e5fe0f193..38b150f775e7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -97,7 +97,7 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) for (n = 0, i = 0; i < sizeof mgid / 2; ++i) { n += sprintf(gid_buf + n, "%x", - be16_to_cpu(((u16 *)mgid.raw)[i])); + be16_to_cpu(((__be16 *) mgid.raw)[i])); if (i < sizeof mgid / 2 - 1) gid_buf[n++] = ':'; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d4300e4a36d8..7f349693b40a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -609,8 +609,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " IPOIB_GID_FMT "\n", skb->dst ? "neigh" : "dst", - be16_to_cpup((u16 *) skb->data), - be32_to_cpup((u32 *) phdr->hwaddr), + be16_to_cpup((__be16 *) skb->data), + be32_to_cpup((__be32 *) phdr->hwaddr), IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); dev_kfree_skb_any(skb); ++priv->stats.tx_dropped; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index e03b070d5222..aca7aea18a69 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -359,7 +359,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | @@ -459,7 +459,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); comp_mask = IB_SA_MCMEMBER_REC_MGID | @@ -648,7 +648,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); /* Remove ourselves from the multicast group */ ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), From 2e8b981c5d5c6fe5479ad47c44e3e76ebb5408ef Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 13 Aug 2005 21:19:38 -0700 Subject: [PATCH 178/584] [PATCH] IB/mthca: add HCA board ID to sysfs info Add support for reporting HCA board ID returned from QUERY_ADAPTER firmware command through sysfs. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 32 +++++ drivers/infiniband/hw/mthca/mthca_cmd.h | 9 +- drivers/infiniband/hw/mthca/mthca_dev.h | 5 + drivers/infiniband/hw/mthca/mthca_main.c | 122 +++++++++---------- drivers/infiniband/hw/mthca/mthca_provider.c | 10 +- 5 files changed, 109 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 1e60487ecd7f..e15c1e2deab4 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1085,6 +1085,34 @@ out: return err; } +static void get_board_id(void *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MTHCA_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + int mthca_QUERY_ADAPTER(struct mthca_dev *dev, struct mthca_adapter *adapter, u8 *status) { @@ -1097,6 +1125,7 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, #define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 #define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -1114,6 +1143,9 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, + adapter->board_id); + out: mthca_free_mailbox(dev, mailbox); return err; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 75a629639445..4e0062778ff9 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -184,10 +184,11 @@ struct mthca_dev_lim { }; struct mthca_adapter { - u32 vendor_id; - u32 device_id; - u32 revision_id; - u8 inta_pin; + u32 vendor_id; + u32 device_id; + u32 revision_id; + char board_id[MTHCA_BOARD_ID_LEN]; + u8 inta_pin; }; struct mthca_init_hca_param { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 3519ca4e086c..c8f67c034183 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -68,6 +68,10 @@ enum { MTHCA_MAX_PORTS = 2 }; +enum { + MTHCA_BOARD_ID_LEN = 64 +}; + enum { MTHCA_EQ_CONTEXT_SIZE = 0x40, MTHCA_CQ_CONTEXT_SIZE = 0x40, @@ -248,6 +252,7 @@ struct mthca_dev { unsigned long device_cap_flags; u32 rev_id; + char board_id[MTHCA_BOARD_ID_LEN]; /* firmware info */ u64 fw_ver; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 2d539403bdac..2f039680239c 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -213,7 +213,6 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; err = mthca_SYS_EN(mdev, &status); if (err) { @@ -271,26 +270,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) goto err_disable; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_close; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_close; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; -err_close: - mthca_CLOSE_HCA(mdev, 0, &status); - err_disable: mthca_SYS_DIS(mdev, &status); @@ -507,7 +488,6 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; u64 icm_size; u8 status; int err; @@ -575,21 +555,6 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) goto err_free_icm; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_free_icm; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_free_icm; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; err_free_icm: @@ -615,12 +580,68 @@ err_disable: return err; } +static void mthca_close_hca(struct mthca_dev *mdev) +{ + u8 status; + + mthca_CLOSE_HCA(mdev, 0, &status); + + if (mthca_is_memfree(mdev)) { + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + + mthca_UNMAP_FA(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + } else + mthca_SYS_DIS(mdev, &status); +} + static int __devinit mthca_init_hca(struct mthca_dev *mdev) { + u8 status; + int err; + struct mthca_adapter adapter; + if (mthca_is_memfree(mdev)) - return mthca_init_arbel(mdev); + err = mthca_init_arbel(mdev); else - return mthca_init_tavor(mdev); + err = mthca_init_tavor(mdev); + + if (err) + return err; + + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + if (err) { + mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); + goto err_close; + } + if (status) { + mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + mdev->rev_id = adapter.revision_id; + memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); + + return 0; + +err_close: + mthca_close_hca(mdev); + return err; } static int __devinit mthca_setup_hca(struct mthca_dev *dev) @@ -845,33 +866,6 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev) return 0; } -static void mthca_close_hca(struct mthca_dev *mdev) -{ - u8 status; - - mthca_CLOSE_HCA(mdev, 0, &status); - - if (mthca_is_memfree(mdev)) { - mthca_free_icm_table(mdev, mdev->cq_table.table); - mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); - mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); - mthca_free_icm_table(mdev, mdev->qp_table.qp_table); - mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); - mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); - mthca_unmap_eq_icm(mdev); - - mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); - - mthca_UNMAP_FA(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); - - if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) - mthca_DISABLE_LAM(mdev, &status); - } else - mthca_SYS_DIS(mdev, &status); -} - /* Types of supported HCA */ enum { TAVOR, /* MT23108 */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index e2db5e001869..f5e135f1dc59 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -958,14 +958,22 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) } } +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); +} + static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct class_device_attribute *mthca_class_attributes[] = { &class_device_attr_hw_rev, &class_device_attr_fw_ver, - &class_device_attr_hca_type + &class_device_attr_hca_type, + &class_device_attr_board_id }; int mthca_register_device(struct mthca_dev *dev) From ffbf4c34f1916fa1e0554269c94c57da4a21a348 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 15 Aug 2005 07:35:16 -0700 Subject: [PATCH 179/584] [PATCH] IB: unmap FMRs when destroying FMR pool Make sure that all FMRs are unmapped before we deallocate them so that we don't leak references to our protection domain when destroying an FMR pool. (Bug reported by Guy German ) Signed-off-by: Roland Dreier --- drivers/infiniband/core/fmr_pool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7763b31abba7..1f7374927f38 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -334,6 +334,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) { struct ib_pool_fmr *fmr; struct ib_pool_fmr *tmp; + LIST_HEAD(fmr_list); int i; kthread_stop(pool->thread); @@ -341,6 +342,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); From 7f9f2dba729cee6ea10596ccb07447d467705b08 Mon Sep 17 00:00:00 2001 From: Guy German Date: Mon, 15 Aug 2005 07:38:50 -0700 Subject: [PATCH 180/584] [PATCH] IB/mthca: use generic function instead of arbel_ version in mthca_free_region() Use the generic key_to_hw_index() function instead of the Arbel-specific version in mthca_free_region(). Signed-off-by: Guy German Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 0965e66061b7..1f97a44477f5 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -459,7 +459,7 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, static void mthca_free_region(struct mthca_dev *dev, u32 lkey) { mthca_table_put(dev, dev->mr_table.mpt_table, - arbel_key_to_hw_index(lkey)); + key_to_hw_index(dev, lkey)); mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); } From 5dd2ce1200f4b12687d74de89a527f99e16c344e Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Mon, 15 Aug 2005 14:16:36 -0700 Subject: [PATCH 181/584] [PATCH] IB: Fix ib_mad_thread_completion_handler declaration Change ib_mad_thread_completion_handler to conform to ib_comp_handler declaration. Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 214493cb3a0b..a4a4d9c1eef3 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2296,7 +2296,7 @@ static void timeout_sends(void *data) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } -static void ib_mad_thread_completion_handler(struct ib_cq *cq) +static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) { struct ib_mad_port_private *port_priv = cq->cq_context; @@ -2576,8 +2576,7 @@ static int ib_mad_port_open(struct ib_device *device, cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2; port_priv->cq = ib_create_cq(port_priv->device, - (ib_comp_handler) - ib_mad_thread_completion_handler, + ib_mad_thread_completion_handler, NULL, port_priv, cq_size); if (IS_ERR(port_priv->cq)) { printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); From 2aeba9a03b0d249fc710b9939fc089ce53d8cd30 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 15 Aug 2005 14:29:03 -0700 Subject: [PATCH 182/584] [PATCH] IB: Remove unnecessary includes of changing CONFIG_LOCALVERSION rebuilds too much, for no appearent reason. Remove unneeded includes of . Signed-off-by: Olaf Hering Signed-off-by: Roland Dreier --- drivers/infiniband/core/cache.c | 1 - drivers/infiniband/hw/mthca/mthca_main.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 1 - 4 files changed, 4 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3a129db5ec27..9376e53f50f2 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -35,7 +35,6 @@ * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ */ -#include #include #include #include diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 2f039680239c..279f158aa12b 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -35,7 +35,6 @@ */ #include -#include #include #include #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7f349693b40a..968b27947f8d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -36,7 +36,6 @@ #include "ipoib.h" -#include #include #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 94b8ea812fef..332d730e60c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -32,7 +32,6 @@ * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ -#include #include #include From da6561c285a6e28a075b97fd5a1560a2b0ce843e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 17 Aug 2005 07:39:10 -0700 Subject: [PATCH 183/584] [PATCH] IB/mthca: Use correct port width capability value When we call the INIT_IB firmware command to bring up a port, use the actual port width capability returned by the QUERY_DEV_LIM command instead of always trying to enable both 1X and 4X. This fixes breakage seen when the firmware is build to allow 4X only. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 7 ++----- drivers/infiniband/hw/mthca/mthca_cmd.h | 3 +-- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 1 + drivers/infiniband/hw/mthca/mthca_qp.c | 11 +++++------ 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index e15c1e2deab4..c258c1b7022e 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1282,10 +1282,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_FLAG_SIG (1 << 18) #define INIT_IB_FLAG_NG (1 << 17) #define INIT_IB_FLAG_G0 (1 << 16) -#define INIT_IB_FLAG_1X (1 << 8) -#define INIT_IB_FLAG_4X (1 << 9) -#define INIT_IB_FLAG_12X (1 << 11) #define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 #define INIT_IB_MTU_SHIFT 12 #define INIT_IB_MAX_GID_OFFSET 0x06 #define INIT_IB_MAX_PKEY_OFFSET 0x0a @@ -1301,12 +1299,11 @@ int mthca_INIT_IB(struct mthca_dev *dev, memset(inbox, 0, INIT_IB_IN_SIZE); flags = 0; - flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0; - flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0; flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 4e0062778ff9..11f02a61432a 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -220,8 +220,7 @@ struct mthca_init_hca_param { }; struct mthca_init_ib_param { - int enable_1x; - int enable_4x; + int port_width; int vl_cap; int mtu_cap; u16 gid_cap; diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index c8f67c034183..0f90a173ecee 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -148,6 +148,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u8 port_width_cap; }; struct mthca_alloc { diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 279f158aa12b..16c5d4a805f0 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -171,6 +171,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = dev_lim->max_port_width; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 8fbb4f1f5398..b7e3d2342799 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -575,12 +575,11 @@ static void init_port(struct mthca_dev *dev, int port) memset(¶m, 0, sizeof param); - param.enable_1x = 1; - param.enable_4x = 1; - param.vl_cap = dev->limits.vl_cap; - param.mtu_cap = dev->limits.mtu_cap; - param.gid_cap = dev->limits.gid_table_len; - param.pkey_cap = dev->limits.pkey_table_len; + param.port_width = dev->limits.port_width_cap; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = dev->limits.gid_table_len; + param.pkey_cap = dev->limits.pkey_table_len; err = mthca_INIT_IB(dev, ¶m, port, &status); if (err) From d1887ec2125988adccbd8bf0de638c41440bf80e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 18 Aug 2005 12:14:11 -0700 Subject: [PATCH 184/584] [PATCH] IB/mthca: Report correct max_msg_sz Set the max_msg_sz port property correctly in mthca's port_query function. Also zero out the attr struct so that we don't leave any other members uninitialized. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index f5e135f1dc59..08a7340e19ff 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -120,6 +120,8 @@ static int mthca_query_port(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; + memset(props, 0, sizeof *props); + memset(in_mad, 0, sizeof *in_mad); in_mad->base_version = 1; in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; @@ -146,6 +148,7 @@ static int mthca_query_port(struct ib_device *ibdev, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; From d41fcc6705eddd04f7218c985b6da35435ed73cc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 18 Aug 2005 12:23:08 -0700 Subject: [PATCH 185/584] [PATCH] IB: Add SRQ support to midlayer Make the required core API additions and changes for shared receive queues (SRQs). Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 60 +++++++++++++++ drivers/infiniband/include/ib_verbs.h | 103 ++++++++++++++++++++++++-- 2 files changed, 158 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c301a2c41f34..c035510c5a36 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -154,6 +154,66 @@ int ib_destroy_ah(struct ib_ah *ah) } EXPORT_SYMBOL(ib_destroy_ah); +/* Shared receive queues */ + +struct ib_srq *ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr) +{ + struct ib_srq *srq; + + if (!pd->device->create_srq) + return ERR_PTR(-ENOSYS); + + srq = pd->device->create_srq(pd, srq_init_attr, NULL); + + if (!IS_ERR(srq)) { + srq->device = pd->device; + srq->pd = pd; + srq->uobject = NULL; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + } + + return srq; +} +EXPORT_SYMBOL(ib_create_srq); + +int ib_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask) +{ + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); +} +EXPORT_SYMBOL(ib_modify_srq); + +int ib_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr) +{ + return srq->device->query_srq ? + srq->device->query_srq(srq, srq_attr) : -ENOSYS; +} +EXPORT_SYMBOL(ib_query_srq); + +int ib_destroy_srq(struct ib_srq *srq) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&srq->usecnt)) + return -EBUSY; + + pd = srq->pd; + + ret = srq->device->destroy_srq(srq); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_srq); + /* Queue pairs */ struct ib_qp *ib_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index 042a7d11fbcc..e16cf94870f2 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -256,7 +256,10 @@ enum ib_event_type { IB_EVENT_PORT_ERR, IB_EVENT_LID_CHANGE, IB_EVENT_PKEY_CHANGE, - IB_EVENT_SM_CHANGE + IB_EVENT_SM_CHANGE, + IB_EVENT_SRQ_ERR, + IB_EVENT_SRQ_LIMIT_REACHED, + IB_EVENT_QP_LAST_WQE_REACHED }; struct ib_event { @@ -264,6 +267,7 @@ struct ib_event { union { struct ib_cq *cq; struct ib_qp *qp; + struct ib_srq *srq; u8 port_num; } element; enum ib_event_type event; @@ -386,6 +390,23 @@ enum ib_cq_notify { IB_CQ_NEXT_COMP }; +enum ib_srq_attr_mask { + IB_SRQ_MAX_WR = 1 << 0, + IB_SRQ_LIMIT = 1 << 1, +}; + +struct ib_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; +}; + +struct ib_srq_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + struct ib_srq_attr attr; +}; + struct ib_qp_cap { u32 max_send_wr; u32 max_recv_wr; @@ -713,10 +734,11 @@ struct ib_cq { }; struct ib_srq { - struct ib_device *device; - struct ib_uobject *uobject; - struct ib_pd *pd; - void *srq_context; + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + void (*event_handler)(struct ib_event *, void *); + void *srq_context; atomic_t usecnt; }; @@ -830,6 +852,18 @@ struct ib_device { int (*query_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq * (*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + int (*modify_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + int (*query_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + int (*post_srq_recv)(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); struct ib_qp * (*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -1041,6 +1075,65 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); */ int ib_destroy_ah(struct ib_ah *ah); +/** + * ib_create_srq - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr); + +/** + * ib_modify_srq - Modifies the attributes for the specified SRQ. + * @srq: The SRQ to modify. + * @srq_attr: On input, specifies the SRQ attributes to modify. On output, + * the current values of selected SRQ attributes are returned. + * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ + * are being modified. + * + * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or + * IB_SRQ_LIMIT to set the SRQ's limit and request notification when + * the number of receives queued drops below the limit. + */ +int ib_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + +/** + * ib_query_srq - Returns the attribute list and current values for the + * specified SRQ. + * @srq: The SRQ to query. + * @srq_attr: The attributes of the specified SRQ. + */ +int ib_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + +/** + * ib_destroy_srq - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + */ +int ib_destroy_srq(struct ib_srq *srq); + +/** + * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. + * @srq: The SRQ to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); +} + /** * ib_create_qp - Creates a QP associated with the specified protection * domain. From f520ba5aa48e2891c3fb3e364eeaaab4212c7c45 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 18 Aug 2005 12:24:13 -0700 Subject: [PATCH 186/584] [PATCH] IB: userspace SRQ support Add SRQ support to userspace verbs module. This adds several commands and associated structures, but it's OK to do this without bumping the ABI version because the commands are added at the end of the list so they don't change the existing numbering. There are two cases to worry about: 1. New kernel, old userspace. This is OK because old userspace simply won't try to use the new SRQ commands. None of the old commands are changed. 2. Old kernel, new userspace. This works perfectly as long as userspace doesn't try to use SRQ commands. If userspace tries to use SRQ commands, it will get EINVAL, which is perfectly reasonable: the kernel doesn't support SRQs, so we couldn't do any better. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 5 + drivers/infiniband/core/uverbs_cmd.c | 182 ++++++++++++++++++++- drivers/infiniband/core/uverbs_main.c | 20 ++- drivers/infiniband/include/ib_user_verbs.h | 35 +++- 4 files changed, 238 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3e158f5acfc6..db161810c0c0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -99,10 +99,12 @@ extern struct idr ib_uverbs_mw_idr; extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; +extern struct idr ib_uverbs_srq_idr; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -131,5 +133,8 @@ IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); +IB_UVERBS_DECLARE_CMD(create_srq); +IB_UVERBS_DECLARE_CMD(modify_srq); +IB_UVERBS_DECLARE_CMD(destroy_srq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5f2bbcda4c73..ebccf9f38af9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -724,6 +724,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_cq *scq, *rcq; + struct ib_srq *srq; struct ib_qp *qp; struct ib_qp_init_attr attr; int ret; @@ -747,10 +748,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); + srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; if (!pd || pd->uobject->context != file->ucontext || !scq || scq->uobject->context != file->ucontext || - !rcq || rcq->uobject->context != file->ucontext) { + !rcq || rcq->uobject->context != file->ucontext || + (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) { ret = -EINVAL; goto err_up; } @@ -759,7 +762,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.qp_context = file; attr.send_cq = scq; attr.recv_cq = rcq; - attr.srq = NULL; + attr.srq = srq; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; @@ -1004,3 +1007,178 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, return ret ? ret : in_len; } + +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_srq *srq; + struct ib_srq_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_srq_event_handler; + attr.srq_context = file; + attr.attr.max_wr = cmd.max_wr; + attr.attr.max_sge = cmd.max_sge; + attr.attr.srq_limit = cmd.srq_limit; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + srq = pd->device->create_srq(pd, &attr, &udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_up; + } + + srq->device = pd->device; + srq->pd = pd; + srq->uobject = uobj; + srq->event_handler = attr.event_handler; + srq->srq_context = attr.srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + + memset(&resp, 0, sizeof resp); + +retry: + if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.srq_handle = uobj->id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->srq_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + +err_destroy: + ib_destroy_srq(srq); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_srq cmd; + struct ib_srq *srq; + struct ib_srq_attr attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr.max_wr = cmd.max_wr; + attr.max_sge = cmd.max_sge; + attr.srq_limit = cmd.srq_limit; + + ret = ib_modify_srq(srq, &attr, cmd.attr_mask); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_srq cmd; + struct ib_srq *srq; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + uobj = srq->uobject; + + ret = ib_destroy_srq(srq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fd8e96359304..09caf5b1ef36 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -69,6 +69,7 @@ DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); +DEFINE_IDR(ib_uverbs_srq_idr); static spinlock_t map_lock; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -93,6 +94,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -127,7 +131,14 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) kfree(uobj); } - /* XXX Free SRQs */ + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { + struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + idr_remove(&ib_uverbs_srq_idr, uobj->id); + ib_destroy_srq(srq); + list_del(&uobj->list); + kfree(uobj); + } + /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { @@ -346,6 +357,13 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) event->event); } +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.srq->uobject->user_handle, + event->event); +} + static void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h index 35857857aa3e..7ebb01c8f996 100644 --- a/drivers/infiniband/include/ib_user_verbs.h +++ b/drivers/infiniband/include/ib_user_verbs.h @@ -78,7 +78,12 @@ enum { IB_USER_VERBS_CMD_POST_SEND, IB_USER_VERBS_CMD_POST_RECV, IB_USER_VERBS_CMD_ATTACH_MCAST, - IB_USER_VERBS_CMD_DETACH_MCAST + IB_USER_VERBS_CMD_DETACH_MCAST, + IB_USER_VERBS_CMD_CREATE_SRQ, + IB_USER_VERBS_CMD_MODIFY_SRQ, + IB_USER_VERBS_CMD_QUERY_SRQ, + IB_USER_VERBS_CMD_DESTROY_SRQ, + IB_USER_VERBS_CMD_POST_SRQ_RECV }; /* @@ -386,4 +391,32 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; +struct ib_uverbs_create_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq_resp { + __u32 srq_handle; +}; + +struct ib_uverbs_modify_srq { + __u32 srq_handle; + __u32 attr_mask; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_destroy_srq { + __u32 srq_handle; +}; + #endif /* IB_USER_VERBS_H */ From 87b816706bb2b79fbaff8e0b8e279e783273383e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 18 Aug 2005 13:39:31 -0700 Subject: [PATCH 187/584] [PATCH] IB/mthca: Factor out common queue alloc code Clean up the allocation of memory for queues by factoring out the common code into mthca_buf_alloc() and mthca_buf_free(). Now CQs and QPs share the same queue allocation code, which we'll also use for SRQs. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_allocator.c | 116 +++++++++++++++++ drivers/infiniband/hw/mthca/mthca_cq.c | 118 +----------------- drivers/infiniband/hw/mthca/mthca_dev.h | 5 + drivers/infiniband/hw/mthca/mthca_provider.h | 15 ++- drivers/infiniband/hw/mthca/mthca_qp.c | 111 ++-------------- 5 files changed, 141 insertions(+), 224 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index b1db48dd91d6..9ba3211cef7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -177,3 +177,119 @@ void mthca_array_cleanup(struct mthca_array *array, int nent) kfree(array->page_list); } + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr) +{ + int err = -ENOMEM; + int npages, shift; + u64 *dma_list = NULL; + dma_addr_t t; + int i; + + if (size <= max_direct) { + *is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + pci_unmap_addr_set(&buf->direct, mapping, t); + + memset(buf->direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + *is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + return -ENOMEM; + + buf->page_list = kmalloc(npages * sizeof *buf->page_list, + GFP_KERNEL); + if (!buf->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + buf->page_list[i].buf = NULL; + + for (i = 0; i < npages; ++i) { + buf->page_list[i].buf = + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + dma_list[i] = t; + pci_unmap_addr_set(&buf->page_list[i], mapping, t); + + memset(buf->page_list[i].buf, 0, PAGE_SIZE); + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_READ | + (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), + mr); + if (err) + goto err_free; + + kfree(dma_list); + + return 0; + +err_free: + mthca_buf_free(dev, size, buf, *is_direct, NULL); + +err_out: + kfree(dma_list); + + return err; +} + +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr) +{ + int i; + + if (mr) + mthca_free_mr(dev, mr); + + if (is_direct) + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + pci_unmap_addr(&buf->direct, mapping)); + else { + for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + pci_unmap_addr(&buf->page_list[i], + mapping)); + kfree(buf->page_list); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 907867d1f2e0..8afb9ee2fbc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -639,113 +639,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) { - int i; - int size; - - if (cq->is_direct) - dma_free_coherent(&dev->pdev->dev, - (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - cq->queue.direct.buf, - pci_unmap_addr(&cq->queue.direct, - mapping)); - else { - size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; - for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) - if (cq->queue.page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - cq->queue.page_list[i].buf, - pci_unmap_addr(&cq->queue.page_list[i], - mapping)); - - kfree(cq->queue.page_list); - } -} - -static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, - struct mthca_cq *cq) -{ - int err = -ENOMEM; - int npages, shift; - u64 *dma_list = NULL; - dma_addr_t t; - int i; - - if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) { - cq->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); - if (!cq->queue.direct.buf) - return -ENOMEM; - - pci_unmap_addr_set(&cq->queue.direct, mapping, t); - - memset(cq->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - cq->is_direct = 0; - npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - shift = PAGE_SHIFT; - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - return -ENOMEM; - - cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list, - GFP_KERNEL); - if (!cq->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) - cq->queue.page_list[i].buf = NULL; - - for (i = 0; i < npages; ++i) { - cq->queue.page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!cq->queue.page_list[i].buf) - goto err_free; - - dma_list[i] = t; - pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t); - - memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE); - } - } - - err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, - dma_list, shift, npages, - 0, size, - MTHCA_MPT_FLAG_LOCAL_WRITE | - MTHCA_MPT_FLAG_LOCAL_READ, - &cq->mr); - if (err) - goto err_free; - - kfree(dma_list); - - return 0; - -err_free: - mthca_free_cq_buf(dev, cq); - -err_out: - kfree(dma_list); - - return err; + mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + &cq->queue, cq->is_direct, &cq->mr); } int mthca_init_cq(struct mthca_dev *dev, int nent, @@ -797,7 +692,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context = mailbox->buf; if (cq->is_kernel) { - err = mthca_alloc_cq_buf(dev, size, cq); + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, + &cq->queue, &cq->is_direct, + &dev->driver_pd, 1, &cq->mr); if (err) goto err_out_mailbox; @@ -858,10 +755,8 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, return 0; err_out_free_mr: - if (cq->is_kernel) { - mthca_free_mr(dev, &cq->mr); + if (cq->is_kernel) mthca_free_cq_buf(dev, cq); - } err_out_mailbox: mthca_free_mailbox(dev, mailbox); @@ -929,7 +824,6 @@ void mthca_free_cq(struct mthca_dev *dev, wait_event(cq->wait, !atomic_read(&cq->refcount)); if (cq->is_kernel) { - mthca_free_mr(dev, &cq->mr); mthca_free_cq_buf(dev, cq); if (mthca_is_memfree(dev)) { mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 0f90a173ecee..cb78b5d07201 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -361,6 +361,11 @@ int mthca_array_set(struct mthca_array *array, int index, void *value); void mthca_array_clear(struct mthca_array *array, int index); int mthca_array_init(struct mthca_array *array, int nent); void mthca_array_cleanup(struct mthca_array *array, int nent); +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr); +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr); int mthca_init_uar_table(struct mthca_dev *dev); int mthca_init_pd_table(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 624651edf577..b95249ee46cf 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -51,6 +51,11 @@ struct mthca_buf_list { DECLARE_PCI_UNMAP_ADDR(mapping) }; +union mthca_buf { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; +}; + struct mthca_uar { unsigned long pfn; int index; @@ -187,10 +192,7 @@ struct mthca_cq { __be32 *arm_db; int arm_sn; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; struct mthca_mr mr; wait_queue_head_t wait; }; @@ -228,10 +230,7 @@ struct mthca_qp { int send_wqe_offset; u64 *wrid; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; wait_queue_head_t wait; }; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index b7e3d2342799..b5a0bef15b7e 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -926,10 +926,6 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { int size; - int i; - int npages, shift; - dma_addr_t t; - u64 *dma_list = NULL; int err = -ENOMEM; size = sizeof (struct mthca_next_seg) + @@ -979,116 +975,24 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, if (!qp->wrid) goto err_out; - if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { - qp->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", - size, shift); - - qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, - &t, GFP_KERNEL); - if (!qp->queue.direct.buf) - goto err_out; - - pci_unmap_addr_set(&qp->queue.direct, mapping, t); - - memset(qp->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - qp->is_direct = 0; - npages = size / PAGE_SIZE; - shift = PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out; - - qp->queue.page_list = kmalloc(npages * - sizeof *qp->queue.page_list, - GFP_KERNEL); - if (!qp->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) { - qp->queue.page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!qp->queue.page_list[i].buf) - goto err_out_free; - - memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); - - pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); - dma_list[i] = t; - } - } - - err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, - npages, 0, size, - MTHCA_MPT_FLAG_LOCAL_READ, - &qp->mr); + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, + &qp->queue, &qp->is_direct, pd, 0, &qp->mr); if (err) - goto err_out_free; + goto err_out; - kfree(dma_list); return 0; - err_out_free: - if (qp->is_direct) { - dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else - for (i = 0; i < npages; ++i) { - if (qp->queue.page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - - } - - err_out: +err_out: kfree(qp->wrid); - kfree(dma_list); return err; } static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - int i; - int size = PAGE_ALIGN(qp->send_wqe_offset + - (qp->sq.max << qp->sq.wqe_shift)); - - if (qp->is_direct) { - dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else { - for (i = 0; i < size / PAGE_SIZE; ++i) { - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - } - } - + mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)), + &qp->queue, qp->is_direct, &qp->mr); kfree(qp->wrid); } @@ -1433,7 +1337,6 @@ void mthca_free_qp(struct mthca_dev *dev, if (qp->ibqp.send_cq != qp->ibqp.recv_cq) mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - mthca_free_mr(dev, &qp->mr); mthca_free_memfree(dev, qp); mthca_free_wqe_buf(dev, qp); } From 288bdeb4bc5b89befd7ee2f0f0183604034ff6c5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 19 Aug 2005 09:19:05 -0700 Subject: [PATCH 188/584] [PATCH] IB/mthca: Simplify handling of completions with error Mem-free HCAs never generate error CQEs that complete multiple WQEs, so just skip the call to mthca_free_err_wqe() for them rather than having logic to handle the mem-free case in mthca_free_err_wqe(). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cq.c | 13 ++++++++----- drivers/infiniband/hw/mthca/mthca_qp.c | 5 +---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 8afb9ee2fbc6..5dee908c2f34 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -367,6 +367,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return 0; + err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); if (err) return err; @@ -375,12 +382,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, * If we're at the end of the WQE chain, or we've used up our * doorbell count, free the CQE. Otherwise just update it for * the next poll operation. - * - * This does not apply to mem-free HCAs: they don't use the - * doorbell count field, and so we should always free the CQE. */ - if (mthca_is_memfree(dev) || - !(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) + if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) return 0; cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index b5a0bef15b7e..43af076acd5f 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -2086,10 +2086,7 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, else next = get_recv_wqe(qp, index); - if (mthca_is_memfree(dev)) - *dbd = 1; - else - *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); + *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); if (next->ee_nds & cpu_to_be32(0x3f)) *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | (next->ee_nds & cpu_to_be32(0x3f)); From c04bc3d1f417a8a90eef9ab46523dfd44858b28d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 19 Aug 2005 10:33:35 -0700 Subject: [PATCH 189/584] [PATCH] IB/mthca: Move WQE structures into their own header Move the definitions of the WQE structures from mthca_qp.c into mthca_wqe.h, so that we'll be able to share them when we add the SRQ code in mthca_srq.c. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 75 +--------------- drivers/infiniband/hw/mthca/mthca_wqe.h | 114 ++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 74 deletions(-) create mode 100644 drivers/infiniband/hw/mthca/mthca_wqe.h diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 43af076acd5f..ebb8f4a3dd80 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -44,6 +44,7 @@ #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, @@ -175,80 +176,6 @@ enum { MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 }; -enum { - MTHCA_NEXT_DBD = 1 << 7, - MTHCA_NEXT_FENCE = 1 << 6, - MTHCA_NEXT_CQ_UPDATE = 1 << 3, - MTHCA_NEXT_EVENT_GEN = 1 << 2, - MTHCA_NEXT_SOLICIT = 1 << 1, - - MTHCA_MLX_VL15 = 1 << 17, - MTHCA_MLX_SLR = 1 << 16 -}; - -enum { - MTHCA_INVAL_LKEY = 0x100 -}; - -struct mthca_next_seg { - __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ - __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ - __be32 flags; /* [3] CQ [2] Event [1] Solicit */ - __be32 imm; /* immediate data */ -}; - -struct mthca_tavor_ud_seg { - u32 reserved1; - __be32 lkey; - __be64 av_addr; - u32 reserved2[4]; - __be32 dqpn; - __be32 qkey; - u32 reserved3[2]; -}; - -struct mthca_arbel_ud_seg { - __be32 av[8]; - __be32 dqpn; - __be32 qkey; - u32 reserved[2]; -}; - -struct mthca_bind_seg { - __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ - u32 reserved; - __be32 new_rkey; - __be32 lkey; - __be64 addr; - __be64 length; -}; - -struct mthca_raddr_seg { - __be64 raddr; - __be32 rkey; - u32 reserved; -}; - -struct mthca_atomic_seg { - __be64 swap_add; - __be64 compare; -}; - -struct mthca_data_seg { - __be32 byte_count; - __be32 lkey; - __be64 addr; -}; - -struct mthca_mlx_seg { - __be32 nda_op; - __be32 nds; - __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate - [11:8] SL [3] C [2] E */ - __be16 rlid; - __be16 vcrc; -}; - static const u8 mthca_opcode[] = { [IB_WR_SEND] = MTHCA_OPCODE_SEND, [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h new file mode 100644 index 000000000000..1f4c0ff28f79 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ + */ + +#ifndef MTHCA_WQE_H +#define MTHCA_WQE_H + +#include + +enum { + MTHCA_NEXT_DBD = 1 << 7, + MTHCA_NEXT_FENCE = 1 << 6, + MTHCA_NEXT_CQ_UPDATE = 1 << 3, + MTHCA_NEXT_EVENT_GEN = 1 << 2, + MTHCA_NEXT_SOLICIT = 1 << 1, + + MTHCA_MLX_VL15 = 1 << 17, + MTHCA_MLX_SLR = 1 << 16 +}; + +enum { + MTHCA_INVAL_LKEY = 0x100 +}; + +struct mthca_next_seg { + __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + __be32 flags; /* [3] CQ [2] Event [1] Solicit */ + __be32 imm; /* immediate data */ +}; + +struct mthca_tavor_ud_seg { + u32 reserved1; + __be32 lkey; + __be64 av_addr; + u32 reserved2[4]; + __be32 dqpn; + __be32 qkey; + u32 reserved3[2]; +}; + +struct mthca_arbel_ud_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + u32 reserved[2]; +}; + +struct mthca_bind_seg { + __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mthca_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mthca_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mthca_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mthca_mlx_seg { + __be32 nda_op; + __be32 nds; + __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate + [11:8] SL [3] C [2] E */ + __be16 rlid; + __be16 vcrc; +}; + +#endif /* MTHCA_WQE_H */ From d20a40192868082eff6fec729b311cb8463b4a21 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 19 Aug 2005 10:36:11 -0700 Subject: [PATCH 190/584] [PATCH] IB/mthca: Handle context tables smaller than our chunk size When creating a table in context memory where the table is smaller than our chunk size, we don't want to allocate and map a full chunk. Instead, allocate just enough memory to cover the table. This can be pretty simple because all tables are a power-of-2 size, so either the table is a multiple of the chunk size, or it's smaller than one chunk. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index fba0a53ba6ea..1827400f189b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -286,6 +286,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, { struct mthca_icm_table *table; int num_icm; + unsigned chunk_size; int i; u8 status; @@ -306,7 +307,11 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, table->icm[i] = NULL; for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { - table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + chunk_size = MTHCA_TABLE_CHUNK_SIZE; + if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; + + table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN); if (!table->icm[i]) From ec34a922d243c3401a694450734e9effb2bafbfe Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 19 Aug 2005 10:59:31 -0700 Subject: [PATCH 191/584] [PATCH] IB/mthca: Add SRQ implementation Add mthca support for shared receive queues (SRQs), including userspace SRQs. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/Makefile | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 24 + drivers/infiniband/hw/mthca/mthca_cmd.h | 5 + drivers/infiniband/hw/mthca/mthca_cq.c | 32 +- drivers/infiniband/hw/mthca/mthca_dev.h | 24 +- drivers/infiniband/hw/mthca/mthca_main.c | 48 +- drivers/infiniband/hw/mthca/mthca_profile.c | 1 + drivers/infiniband/hw/mthca/mthca_profile.h | 1 + drivers/infiniband/hw/mthca/mthca_provider.c | 82 +++ drivers/infiniband/hw/mthca/mthca_provider.h | 28 + drivers/infiniband/hw/mthca/mthca_qp.c | 33 +- drivers/infiniband/hw/mthca/mthca_srq.c | 591 +++++++++++++++++++ drivers/infiniband/hw/mthca/mthca_user.h | 11 + 13 files changed, 857 insertions(+), 25 deletions(-) create mode 100644 drivers/infiniband/hw/mthca/mthca_srq.c diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index 5dcbd43073e2..1eb87408e069 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index c258c1b7022e..60e4b213635a 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -109,6 +109,7 @@ enum { CMD_SW2HW_SRQ = 0x35, CMD_HW2SW_SRQ = 0x36, CMD_QUERY_SRQ = 0x37, + CMD_ARM_SRQ = 0x40, /* QP/EE commands */ CMD_RST2INIT_QPEE = 0x19, @@ -1032,6 +1033,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); + mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz); mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", @@ -1500,6 +1503,27 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, CMD_TIME_CLASS_A, status); } +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, + CMD_HW2SW_SRQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) +{ + return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, + CMD_TIME_CLASS_B, status); +} + int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 11f02a61432a..ef2a765d6953 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -298,6 +298,11 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 5dee908c2f34..5ece609c2ee0 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -224,7 +224,8 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq) { struct mthca_cq *cq; struct mthca_cqe *cqe; @@ -265,8 +266,11 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) */ while (prod_index > cq->cons_index) { cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); - if (cqe->my_qpn == cpu_to_be32(qpn)) + if (cqe->my_qpn == cpu_to_be32(qpn)) { + if (srq) + mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; + } else if (nfreed) memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & cq->ibcq.cqe), @@ -455,23 +459,27 @@ static inline int mthca_poll_one(struct mthca_dev *dev, >> wq->wqe_shift); entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = be32_to_cpu(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); } else { wq = &(*cur_qp)->rq; wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; entry->wr_id = (*cur_qp)->wrid[wqe_index]; } - if (wq->last_comp < wqe_index) - wq->tail += wqe_index - wq->last_comp; - else - wq->tail += wqe_index + wq->max - wq->last_comp; + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; - wq->last_comp = wqe_index; - - if (0) - mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n", - is_send ? "Send" : "Receive", - (*cur_qp)->qpn, wqe_index, wq->max); + wq->last_comp = wqe_index; + } if (is_error) { err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index cb78b5d07201..7bff5a8425f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -218,6 +218,13 @@ struct mthca_cq_table { struct mthca_icm_table *table; }; +struct mthca_srq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array srq; + struct mthca_icm_table *table; +}; + struct mthca_qp_table { struct mthca_alloc alloc; u32 rdb_base; @@ -299,6 +306,7 @@ struct mthca_dev { struct mthca_mr_table mr_table; struct mthca_eq_table eq_table; struct mthca_cq_table cq_table; + struct mthca_srq_table srq_table; struct mthca_qp_table qp_table; struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; @@ -372,6 +380,7 @@ int mthca_init_pd_table(struct mthca_dev *dev); int mthca_init_mr_table(struct mthca_dev *dev); int mthca_init_eq_table(struct mthca_dev *dev); int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_srq_table(struct mthca_dev *dev); int mthca_init_qp_table(struct mthca_dev *dev); int mthca_init_av_table(struct mthca_dev *dev); int mthca_init_mcg_table(struct mthca_dev *dev); @@ -381,6 +390,7 @@ void mthca_cleanup_pd_table(struct mthca_dev *dev); void mthca_cleanup_mr_table(struct mthca_dev *dev); void mthca_cleanup_eq_table(struct mthca_dev *dev); void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_srq_table(struct mthca_dev *dev); void mthca_cleanup_qp_table(struct mthca_dev *dev); void mthca_cleanup_av_table(struct mthca_dev *dev); void mthca_cleanup_mcg_table(struct mthca_dev *dev); @@ -431,7 +441,19 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); void mthca_cq_event(struct mthca_dev *dev, u32 cqn); -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn); +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq); + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq); +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type); +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 16c5d4a805f0..3241d6c9dc11 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -253,6 +253,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.uarc_size = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if (err < 0) @@ -424,15 +426,29 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, } mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, - dev_lim->cqc_entry_sz, - mdev->limits.num_cqs, - mdev->limits.reserved_cqs, 0); + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_rdb; } + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { + mdev->srq_table.table = + mthca_alloc_icm_table(mdev, init_hca->srqc_base, + dev_lim->srq_entry_sz, + mdev->limits.num_srqs, + mdev->limits.reserved_srqs, 0); + if (!mdev->srq_table.table) { + mthca_err(mdev, "Failed to map SRQ context memory, " + "aborting.\n"); + err = -ENOMEM; + goto err_unmap_cq; + } + } + /* * It's not strictly required, but for simplicity just map the * whole multicast group table now. The table isn't very big @@ -448,11 +464,15 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, if (!mdev->mcg_table.table) { mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); err = -ENOMEM; - goto err_unmap_cq; + goto err_unmap_srq; } return 0; +err_unmap_srq: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + err_unmap_cq: mthca_free_icm_table(mdev, mdev->cq_table.table); @@ -532,6 +552,8 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.num_udav = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if ((int) icm_size < 0) { @@ -558,6 +580,8 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) return 0; err_free_icm: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); mthca_free_icm_table(mdev, mdev->cq_table.table); mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); @@ -587,6 +611,8 @@ static void mthca_close_hca(struct mthca_dev *mdev) mthca_CLOSE_HCA(mdev, 0, &status); if (mthca_is_memfree(mdev)) { + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); mthca_free_icm_table(mdev, mdev->cq_table.table); mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); @@ -731,11 +757,18 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_cmd_poll; } + err = mthca_init_srq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "shared receive queue table, aborting.\n"); + goto err_cq_table_free; + } + err = mthca_init_qp_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "queue pair table, aborting.\n"); - goto err_cq_table_free; + goto err_srq_table_free; } err = mthca_init_av_table(dev); @@ -760,6 +793,9 @@ err_av_table_free: err_qp_table_free: mthca_cleanup_qp_table(dev); +err_srq_table_free: + mthca_cleanup_srq_table(dev); + err_cq_table_free: mthca_cleanup_cq_table(dev); @@ -1046,6 +1082,7 @@ err_cleanup: mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); @@ -1095,6 +1132,7 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 9b280661f2a1..0576056b34f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -102,6 +102,7 @@ u64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_UARC].size = request->uarc_size; profile[MTHCA_RES_QP].num = request->num_qp; + profile[MTHCA_RES_SRQ].num = request->num_srq; profile[MTHCA_RES_EQP].num = request->num_qp; profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; profile[MTHCA_RES_CQ].num = request->num_cq; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index 0d4f070a3fa1..94641808f97f 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -42,6 +42,7 @@ struct mthca_profile { int num_qp; int rdb_per_qp; + int num_srq; int num_cq; int num_mcg; int num_mpt; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 08a7340e19ff..23ceb26af8fe 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -425,6 +425,77 @@ static int mthca_ah_destroy(struct ib_ah *ah) return 0; } +static struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mthca_create_srq ucmd; + struct mthca_ucontext *context = NULL; + struct mthca_srq *srq; + int err; + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index, + ucmd.db_page); + + if (err) + goto err_free; + + srq->mr.ibmr.lkey = ucmd.lkey; + srq->db_index = ucmd.db_index; + } + + err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + &init_attr->attr, srq); + + if (err && pd->uobject) + mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index); + + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { + mthca_free_srq(to_mdev(pd->device), srq); + err = -EFAULT; + goto err_free; + } + + return &srq->ibsrq; + +err_free: + kfree(srq); + + return ERR_PTR(err); +} + +static int mthca_destroy_srq(struct ib_srq *srq) +{ + struct mthca_ucontext *context; + + if (srq->uobject) { + context = to_mucontext(srq->uobject->context); + + mthca_unmap_user_db(to_mdev(srq->device), &context->uar, + context->db_tab, to_msrq(srq)->db_index); + } + + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + kfree(srq); + + return 0; +} + static struct ib_qp *mthca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1003,6 +1074,17 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; dev->ib_dev.destroy_ah = mthca_ah_destroy; + + if (dev->mthca_flags & MTHCA_FLAG_SRQ) { + dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.destroy_srq = mthca_destroy_srq; + + if (mthca_is_memfree(dev)) + dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + else + dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + } + dev->ib_dev.create_qp = mthca_create_qp; dev->ib_dev.modify_qp = mthca_modify_qp; dev->ib_dev.destroy_qp = mthca_destroy_qp; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index b95249ee46cf..024015678c8a 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -197,6 +197,29 @@ struct mthca_cq { wait_queue_head_t wait; }; +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + atomic_t refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; +}; + struct mthca_wq { spinlock_t lock; int max; @@ -277,6 +300,11 @@ static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) return container_of(ibcq, struct mthca_cq, ibcq); } +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mthca_qp, ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ebb8f4a3dd80..7607b9800736 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -612,10 +612,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; if (mthca_is_memfree(dev)) { - qp_context->rq_size_stride = - ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4); - qp_context->sq_size_stride = - ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4); + if (qp->rq.max) + qp_context->rq_size_stride = long_log2(qp->rq.max) << 3; + qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.max) + qp_context->sq_size_stride = long_log2(qp->sq.max) << 3; + qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; } /* leave arbel_sched_queue as 0 */ @@ -784,6 +787,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + if (ibqp->srq) + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); @@ -806,6 +812,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); } + if (ibqp->srq) + qp_context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibqp->srq)->srqn); + err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, qp->qpn, 0, mailbox, 0, &status); if (status) { @@ -1260,9 +1270,11 @@ void mthca_free_qp(struct mthca_dev *dev, * unref the mem-free tables and free the QPN in our table. */ if (!qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); mthca_free_memfree(dev, qp); mthca_free_wqe_buf(dev, qp); @@ -2008,6 +2020,15 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, { struct mthca_next_seg *next; + /* + * For SRQs, all WQEs generate a CQE, so we're always at the + * end of the doorbell chain. + */ + if (qp->ibqp.srq) { + *new_wqe = 0; + return 0; + } + if (is_send) next = get_send_wqe(qp, index); else diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c new file mode 100644 index 000000000000..75cd2d84ef12 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ + */ + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +enum { + MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE +}; + +struct mthca_tavor_srq_context { + __be64 wqe_base_ds; /* low 6 bits is descriptor size */ + __be32 state_pd; + __be32 lkey; + __be32 uar; + __be32 wqe_cnt; + u32 reserved[2]; +}; + +struct mthca_arbel_srq_context { + __be32 state_logsize_srqn; + __be32 lkey; + __be32 db_index; + __be32 logstride_usrpage; + __be64 wqe_base; + __be32 eq_pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved1; + __be16 wqe_counter; + u32 reserved2[3]; +}; + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + if (srq->is_direct) + return srq->queue.direct.buf + (n << srq->wqe_shift); + else + return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use an offset of 4 + * because in the Tavor case, posting a WQE may overwrite the first + * four bytes of the previous WQE. The offset avoids corrupting our + * free list if the WQE has already completed and been put on the free + * list when we post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) (wqe + 4); +} + +static void mthca_tavor_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_tavor_srq_context *context) +{ + memset(context, 0, sizeof *context); + + context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); + context->state_pd = cpu_to_be32(pd->pd_num); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + + if (pd->ibpd.uobject) + context->uar = + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->uar = cpu_to_be32(dev->driver_uar.index); +} + +static void mthca_arbel_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_arbel_srq_context *context) +{ + int logsize; + + memset(context, 0, sizeof *context); + + logsize = long_log2(srq->max) + srq->wqe_shift; + context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + context->db_index = cpu_to_be32(srq->db_index); + context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.uobject) + context->logstride_usrpage |= + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); + context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); +} + +static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) +{ + mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, + srq->is_direct, &srq->mr); + kfree(srq->wrid); +} + +static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + void *wqe; + int err; + int i; + + if (pd->ibpd.uobject) + return 0; + + srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) + return -ENOMEM; + + err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, + MTHCA_MAX_DIRECT_SRQ_SIZE, + &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + if (err) { + kfree(srq->wrid); + return err; + } + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + for (i = 0; i < srq->max; ++i) { + wqe = get_wqe(srq, i); + + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + + for (scatter = wqe + sizeof (struct mthca_next_seg); + (void *) scatter < wqe + (1 << srq->wqe_shift); + ++scatter) + scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + } + + return 0; +} + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + u8 status; + int ds; + int err; + + /* Sanity check SRQ size before proceeding */ + if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + return -EINVAL; + + srq->max = attr->max_wr; + srq->max_gs = attr->max_sge; + srq->last = NULL; + srq->counter = 0; + + if (mthca_is_memfree(dev)) + srq->max = roundup_pow_of_two(srq->max + 1); + + ds = min(64UL, + roundup_pow_of_two(sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg))); + srq->wqe_shift = long_log2(ds); + + srq->srqn = mthca_alloc(&dev->srq_table.alloc); + if (srq->srqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); + if (err) + goto err_out; + + if (!pd->ibpd.uobject) { + srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, + srq->srqn, &srq->db); + if (srq->db_index < 0) { + err = -ENOMEM; + goto err_out_icm; + } + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_db; + } + + err = mthca_alloc_srq_buf(dev, pd, srq); + if (err) + goto err_out_mailbox; + + spin_lock_init(&srq->lock); + atomic_set(&srq->refcount, 1); + init_waitqueue_head(&srq->wait); + + if (mthca_is_memfree(dev)) + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + else + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + + if (err) { + mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); + goto err_out_free_buf; + } + if (status) { + mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_free_buf; + } + + spin_lock_irq(&dev->srq_table.lock); + if (mthca_array_set(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1), + srq)) { + spin_unlock_irq(&dev->srq_table.lock); + goto err_out_free_srq; + } + spin_unlock_irq(&dev->srq_table.lock); + + mthca_free_mailbox(dev, mailbox); + + srq->first_free = 0; + srq->last_free = srq->max - 1; + + return 0; + +err_out_free_srq: + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + +err_out_free_buf: + if (!pd->ibpd.uobject) + mthca_free_srq_buf(dev, srq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_db: + if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + +err_out_icm: + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + +err_out: + mthca_free(&dev->srq_table.alloc, srq->srqn); + + return err; +} + +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); + return; + } + + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + + spin_lock_irq(&dev->srq_table.lock); + mthca_array_clear(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1)); + spin_unlock_irq(&dev->srq_table.lock); + + atomic_dec(&srq->refcount); + wait_event(srq->wait, !atomic_read(&srq->refcount)); + + if (!srq->ibsrq.uobject) { + mthca_free_srq_buf(dev, srq); + if (mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + } + + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + mthca_free(&dev->srq_table.alloc, srq->srqn); + mthca_free_mailbox(dev, mailbox); +} + +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type) +{ + struct mthca_srq *srq; + struct ib_event event; + + spin_lock(&dev->srq_table.lock); + srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + spin_unlock(&dev->srq_table.lock); + + if (!srq) { + mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + if (!srq->ibsrq.event_handler) + goto out; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.srq = &srq->ibsrq; + srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); + +out: + if (atomic_dec_and_test(&srq->refcount)) + wake_up(&srq->wait); +} + +/* + * This function must be called with IRQs disabled. + */ +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) +{ + int ind; + + ind = wqe_addr >> srq->wqe_shift; + + spin_lock(&srq->lock); + + if (likely(srq->first_free >= 0)) + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; + else + srq->first_free = ind; + + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + spin_unlock(&srq->lock); +} + +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&srq->lock, flags); + + first_ind = srq->first_free; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + srq->last = prev_wqe; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + if (likely(prev_wqe)) { + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << srq->wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + return nreq; + + if (likely(nreq)) { + __be32 doorbell[2]; + + doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); + doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + ((struct mthca_next_seg *) wqe)->nda_op = + cpu_to_be32((next_ind << srq->wqe_shift) | 1); + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cpu_to_be32(srq->counter); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int __devinit mthca_init_srq_table(struct mthca_dev *dev) +{ + int err; + + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return 0; + + spin_lock_init(&dev->srq_table.lock); + + err = mthca_alloc_init(&dev->srq_table.alloc, + dev->limits.num_srqs, + dev->limits.num_srqs - 1, + dev->limits.reserved_srqs); + if (err) + return err; + + err = mthca_array_init(&dev->srq_table.srq, + dev->limits.num_srqs); + if (err) + mthca_alloc_cleanup(&dev->srq_table.alloc); + + return err; +} + +void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) +{ + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return; + + mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); + mthca_alloc_cleanup(&dev->srq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 3024c1b4547d..41613ec8a04e 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -69,6 +69,17 @@ struct mthca_create_cq_resp { __u32 reserved; }; +struct mthca_create_srq { + __u32 lkey; + __u32 db_index; + __u64 db_page; +}; + +struct mthca_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + struct mthca_create_qp { __u32 lkey; __u32 reserved; From 4ce059378c04b40c2e9f658b1c6a2e9078b85c7c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 19 Aug 2005 12:03:17 -0700 Subject: [PATCH 192/584] [PATCH] IPoIB: Set full membership bit in P_Keys Always make sure that the full membership bit is set in the P_Keys that IPoIB uses. This makes sure that all hosts join the correct multicast groups so that hosts that are partial partition members can talk to the rest of the network. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 968b27947f8d..57c3ac98991f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -883,6 +883,12 @@ static ssize_t create_child(struct class_device *cdev, if (pkey < 0 || pkey > 0xffff) return -EINVAL; + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + pkey |= 0x8000; + ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), pkey); @@ -935,6 +941,12 @@ static struct net_device *ipoib_add_port(const char *format, goto alloc_mem_failed; } + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; From b9ef520f9caf20aba8ac7cb2bbba45b52ff19d53 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 19 Aug 2005 13:46:34 -0700 Subject: [PATCH 193/584] [PATCH] IB: fix userspace CM deadlock Fix deadlock condition resulting from trying to destroy a cm_id from the context of a CM thread. The synchronization around the ucm context structure is simplified as a result, and some simple code cleanup is included. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/ucm.c | 464 ++++++++++++---------------------- drivers/infiniband/core/ucm.h | 9 +- 2 files changed, 160 insertions(+), 313 deletions(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 61d07c732f49..79595826ccc7 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -73,14 +74,18 @@ static struct semaphore ctx_id_mutex; static struct idr ctx_id_table; static int ctx_id_rover = 0; -static struct ib_ucm_context *ib_ucm_ctx_get(int id) +static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; down(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, id); - if (ctx) - ctx->ref++; + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + atomic_inc(&ctx->ref); up(&ctx_id_mutex); return ctx; @@ -88,21 +93,37 @@ static struct ib_ucm_context *ib_ucm_ctx_get(int id) static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { + if (atomic_dec_and_test(&ctx->ref)) + wake_up(&ctx->wait); +} + +static ssize_t ib_ucm_destroy_ctx(struct ib_ucm_file *file, int id) +{ + struct ib_ucm_context *ctx; struct ib_ucm_event *uevent; down(&ctx_id_mutex); - - ctx->ref--; - if (!ctx->ref) + ctx = idr_find(&ctx_id_table, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else idr_remove(&ctx_id_table, ctx->id); - up(&ctx_id_mutex); - if (ctx->ref) - return; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - down(&ctx->file->mutex); + atomic_dec(&ctx->ref); + wait_event(ctx->wait, !atomic_read(&ctx->ref)); + /* No new events will be generated after destroying the cm_id. */ + if (!IS_ERR(ctx->cm_id)) + ib_destroy_cm_id(ctx->cm_id); + + /* Cleanup events not yet reported to the user. */ + down(&file->mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { @@ -117,13 +138,10 @@ static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) kfree(uevent); } + up(&file->mutex); - up(&ctx->file->mutex); - - ucm_dbg("Destroyed CM ID <%d>\n", ctx->id); - - ib_destroy_cm_id(ctx->cm_id); kfree(ctx); + return 0; } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) @@ -135,11 +153,11 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) if (!ctx) return NULL; - ctx->ref = 1; /* user reference */ + atomic_set(&ctx->ref, 1); + init_waitqueue_head(&ctx->wait); ctx->file = file; INIT_LIST_HEAD(&ctx->events); - init_MUTEX(&ctx->mutex); list_add_tail(&ctx->file_list, &file->ctxs); @@ -177,8 +195,8 @@ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, if (!kpath || !upath) return; - memcpy(upath->dgid, kpath->dgid.raw, sizeof(union ib_gid)); - memcpy(upath->sgid, kpath->sgid.raw, sizeof(union ib_gid)); + memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid); + memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid); upath->dlid = kpath->dlid; upath->slid = kpath->slid; @@ -201,10 +219,11 @@ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, kpath->packet_life_time_selector; } -static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, +static void ib_ucm_event_req_get(struct ib_ucm_context *ctx, + struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { - ureq->listen_id = (long)kreq->listen_id->context; + ureq->listen_id = ctx->id; ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; @@ -240,34 +259,11 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, urep->srq = krep->srq; } -static void ib_ucm_event_rej_get(struct ib_ucm_rej_event_resp *urej, - struct ib_cm_rej_event_param *krej) -{ - urej->reason = krej->reason; -} - -static void ib_ucm_event_mra_get(struct ib_ucm_mra_event_resp *umra, - struct ib_cm_mra_event_param *kmra) -{ - umra->timeout = kmra->service_timeout; -} - -static void ib_ucm_event_lap_get(struct ib_ucm_lap_event_resp *ulap, - struct ib_cm_lap_event_param *klap) -{ - ib_ucm_event_path_get(&ulap->path, klap->alternate_path); -} - -static void ib_ucm_event_apr_get(struct ib_ucm_apr_event_resp *uapr, - struct ib_cm_apr_event_param *kapr) -{ - uapr->status = kapr->ap_status; -} - -static void ib_ucm_event_sidr_req_get(struct ib_ucm_sidr_req_event_resp *ureq, +static void ib_ucm_event_sidr_req_get(struct ib_ucm_context *ctx, + struct ib_ucm_sidr_req_event_resp *ureq, struct ib_cm_sidr_req_event_param *kreq) { - ureq->listen_id = (long)kreq->listen_id->context; + ureq->listen_id = ctx->id; ureq->pkey = kreq->pkey; } @@ -279,19 +275,18 @@ static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, urep->qpn = krep->qpn; }; -static int ib_ucm_event_process(struct ib_cm_event *evt, +static int ib_ucm_event_process(struct ib_ucm_context *ctx, + struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; - int result; switch (evt->event) { case IB_CM_REQ_RECEIVED: - ib_ucm_event_req_get(&uvt->resp.u.req_resp, + ib_ucm_event_req_get(ctx, &uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; - uvt->resp.present |= (evt->param.req_rcvd.primary_path ? - IB_UCM_PRES_PRIMARY : 0); + uvt->resp.present = IB_UCM_PRES_PRIMARY; uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? IB_UCM_PRES_ALTERNATE : 0); break; @@ -299,57 +294,46 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, &evt->param.rep_rcvd); uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; - break; case IB_CM_RTU_RECEIVED: uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; - break; case IB_CM_DREQ_RECEIVED: uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; - break; case IB_CM_DREP_RECEIVED: uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; - break; case IB_CM_MRA_RECEIVED: - ib_ucm_event_mra_get(&uvt->resp.u.mra_resp, - &evt->param.mra_rcvd); + uvt->resp.u.mra_resp.timeout = + evt->param.mra_rcvd.service_timeout; uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; - break; case IB_CM_REJ_RECEIVED: - ib_ucm_event_rej_get(&uvt->resp.u.rej_resp, - &evt->param.rej_rcvd); + uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.rej_rcvd.ari_length; info = evt->param.rej_rcvd.ari; - break; case IB_CM_LAP_RECEIVED: - ib_ucm_event_lap_get(&uvt->resp.u.lap_resp, - &evt->param.lap_rcvd); + ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path, + evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; - uvt->resp.present |= (evt->param.lap_rcvd.alternate_path ? - IB_UCM_PRES_ALTERNATE : 0); + uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; case IB_CM_APR_RECEIVED: - ib_ucm_event_apr_get(&uvt->resp.u.apr_resp, - &evt->param.apr_rcvd); + uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.apr_rcvd.info_len; info = evt->param.apr_rcvd.apr_info; - break; case IB_CM_SIDR_REQ_RECEIVED: - ib_ucm_event_sidr_req_get(&uvt->resp.u.sidr_req_resp, + ib_ucm_event_sidr_req_get(ctx, &uvt->resp.u.sidr_req_resp, &evt->param.sidr_req_rcvd); uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; - break; case IB_CM_SIDR_REP_RECEIVED: ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, @@ -357,43 +341,35 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.sidr_rep_rcvd.info_len; info = evt->param.sidr_rep_rcvd.info; - break; default: uvt->resp.u.send_status = evt->param.send_status; - break; } - if (uvt->data_len && evt->private_data) { - + if (uvt->data_len) { uvt->data = kmalloc(uvt->data_len, GFP_KERNEL); - if (!uvt->data) { - result = -ENOMEM; - goto error; - } + if (!uvt->data) + goto err1; memcpy(uvt->data, evt->private_data, uvt->data_len); uvt->resp.present |= IB_UCM_PRES_DATA; } - if (uvt->info_len && info) { - + if (uvt->info_len) { uvt->info = kmalloc(uvt->info_len, GFP_KERNEL); - if (!uvt->info) { - result = -ENOMEM; - goto error; - } + if (!uvt->info) + goto err2; memcpy(uvt->info, info, uvt->info_len); uvt->resp.present |= IB_UCM_PRES_INFO; } - return 0; -error: - kfree(uvt->info); + +err2: kfree(uvt->data); - return result; +err1: + return -ENOMEM; } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, @@ -403,63 +379,42 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_ucm_context *ctx; int result = 0; int id; - /* - * lookup correct context based on event type. - */ - switch (event->event) { - case IB_CM_REQ_RECEIVED: - id = (long)event->param.req_rcvd.listen_id->context; - break; - case IB_CM_SIDR_REQ_RECEIVED: - id = (long)event->param.sidr_req_rcvd.listen_id->context; - break; - default: - id = (long)cm_id->context; - break; - } - ucm_dbg("Event. CM ID <%d> event <%d>\n", id, event->event); - - ctx = ib_ucm_ctx_get(id); - if (!ctx) - return -ENOENT; + ctx = cm_id->context; if (event->event == IB_CM_REQ_RECEIVED || event->event == IB_CM_SIDR_REQ_RECEIVED) id = IB_UCM_CM_ID_INVALID; + else + id = ctx->id; uevent = kmalloc(sizeof(*uevent), GFP_KERNEL); - if (!uevent) { - result = -ENOMEM; - goto done; - } + if (!uevent) + goto err1; memset(uevent, 0, sizeof(*uevent)); - uevent->resp.id = id; uevent->resp.event = event->event; - result = ib_ucm_event_process(event, uevent); + result = ib_ucm_event_process(ctx, event, uevent); if (result) - goto done; + goto err2; uevent->ctx = ctx; - uevent->cm_id = ((event->event == IB_CM_REQ_RECEIVED || - event->event == IB_CM_SIDR_REQ_RECEIVED ) ? - cm_id : NULL); + uevent->cm_id = (id == IB_UCM_CM_ID_INVALID) ? cm_id : NULL; down(&ctx->file->mutex); - list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); - wake_up_interruptible(&ctx->file->poll_wait); - up(&ctx->file->mutex); -done: - ctx->error = result; - ib_ucm_ctx_put(ctx); /* func reference */ - return result; + return 0; + +err2: + kfree(uevent); +err1: + /* Destroy new cm_id's */ + return (id == IB_UCM_CM_ID_INVALID); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, @@ -517,9 +472,8 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, goto done; } - ctx->cm_id = uevent->cm_id; - ctx->cm_id->cm_handler = ib_ucm_event_handler; - ctx->cm_id->context = (void *)(unsigned long)ctx->id; + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; @@ -585,30 +539,29 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + down(&file->mutex); ctx = ib_ucm_ctx_alloc(file); + up(&file->mutex); if (!ctx) return -ENOMEM; - ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, - (void *)(unsigned long)ctx->id); - if (!ctx->cm_id) { - result = -ENOMEM; - goto err_cm; + ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + if (IS_ERR(ctx->cm_id)) { + result = PTR_ERR(ctx->cm_id); + goto err; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err_ret; + goto err; } return 0; -err_ret: - ib_destroy_cm_id(ctx->cm_id); -err_cm: - ib_ucm_ctx_put(ctx); /* user reference */ +err: + ib_ucm_destroy_ctx(file, ctx->id); return result; } @@ -617,19 +570,11 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; - struct ib_ucm_context *ctx; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) - return -ENOENT; - - ib_ucm_ctx_put(ctx); /* user reference */ - ib_ucm_ctx_put(ctx); /* func reference */ - - return 0; + return ib_ucm_destroy_ctx(file, cmd.id); } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, @@ -647,15 +592,9 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) - return -ENOENT; - - down(&ctx->file->mutex); - if (ctx->file != file) { - result = -EINVAL; - goto done; - } + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); resp.service_id = ctx->cm_id->service_id; resp.service_mask = ctx->cm_id->service_mask; @@ -666,9 +605,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, &resp, sizeof(resp))) result = -EFAULT; -done: - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ + ib_ucm_ctx_put(ctx); return result; } @@ -683,19 +620,12 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) - return -ENOENT; + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else - result = ib_cm_listen(ctx->cm_id, cmd.service_id, - cmd.service_mask); - - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); + ib_ucm_ctx_put(ctx); return result; } @@ -710,18 +640,12 @@ static ssize_t ib_ucm_establish(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) - return -ENOENT; + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else - result = ib_cm_establish(ctx->cm_id); - - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ + result = ib_cm_establish(ctx->cm_id); + ib_ucm_ctx_put(ctx); return result; } @@ -768,8 +692,8 @@ static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) return -EFAULT; } - memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof(union ib_gid)); - memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof(union ib_gid)); + memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid); + memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid); sa_path->dlid = ucm_path.dlid; sa_path->slid = ucm_path.slid; @@ -839,25 +763,17 @@ static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, param.max_cm_retries = cmd.max_cm_retries; param.srq = cmd.srq; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = ib_send_cm_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ done: kfree(param.private_data); kfree(param.primary_path); kfree(param.alternate_path); - return result; } @@ -890,23 +806,14 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, param.rnr_retry_count = cmd.rnr_retry_count; param.srq = cmd.srq; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = ib_send_cm_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ -done: kfree(param.private_data); - return result; } @@ -928,23 +835,14 @@ static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, if (result) return result; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = func(ctx->cm_id, private_data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ -done: kfree(private_data); - return result; } @@ -995,26 +893,17 @@ static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, if (result) goto done; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else - result = func(ctx->cm_id, cmd.status, - info, cmd.info_len, + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = func(ctx->cm_id, cmd.status, info, cmd.info_len, data, cmd.data_len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ done: kfree(data); kfree(info); - return result; } @@ -1048,24 +937,14 @@ static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, if (result) return result; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else - result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, - data, cmd.len); - - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ -done: kfree(data); - return result; } @@ -1090,24 +969,16 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, if (result) goto done; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ done: kfree(data); kfree(path); - return result; } @@ -1140,24 +1011,16 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, param.max_cm_retries = cmd.max_cm_retries; param.pkey = cmd.pkey; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ done: kfree(param.private_data); kfree(param.path); - return result; } @@ -1184,30 +1047,22 @@ static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, if (result) goto done; - param.qp_num = cmd.qpn; - param.qkey = cmd.qkey; - param.status = cmd.status; - param.info_length = cmd.info_len; - param.private_data_len = cmd.data_len; + param.qp_num = cmd.qpn; + param.qkey = cmd.qkey; + param.status = cmd.status; + param.info_length = cmd.info_len; + param.private_data_len = cmd.data_len; - ctx = ib_ucm_ctx_get(cmd.id); - if (!ctx) { - result = -ENOENT; - goto done; - } - - down(&ctx->file->mutex); - if (ctx->file != file) - result = -EINVAL; - else + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* func reference */ done: kfree(param.private_data); kfree(param.info); - return result; } @@ -1305,22 +1160,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) struct ib_ucm_context *ctx; down(&file->mutex); - while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); - up(&ctx->file->mutex); - ib_ucm_ctx_put(ctx); /* user reference */ + up(&file->mutex); + ib_ucm_destroy_ctx(file, ctx->id); down(&file->mutex); } - up(&file->mutex); - kfree(file); - - ucm_dbg("Deleted struct\n"); return 0; } diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h index 6d36606151b2..039e8a3783c0 100644 --- a/drivers/infiniband/core/ucm.h +++ b/drivers/infiniband/core/ucm.h @@ -48,9 +48,7 @@ struct ib_ucm_file { struct semaphore mutex; struct file *filp; - /* - * list of pending events - */ + struct list_head ctxs; /* list of active connections */ struct list_head events; /* list of pending events */ wait_queue_head_t poll_wait; @@ -58,12 +56,11 @@ struct ib_ucm_file { struct ib_ucm_context { int id; - int ref; - int error; + wait_queue_head_t wait; + atomic_t ref; struct ib_ucm_file *file; struct ib_cm_id *cm_id; - struct semaphore mutex; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ From fe9e08e17af414a5fd8f3141b0fd88677f81a883 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 19 Aug 2005 13:50:33 -0700 Subject: [PATCH 194/584] [PATCH] IB: Add handling for ABORT and STOP RMPP MADs. Add handling for ABORT / STOP RMPP MADs. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_rmpp.c | 309 ++++++++++++++++++++++------ drivers/infiniband/include/ib_mad.h | 2 + 2 files changed, 246 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index d68bf7e220f9..43fd805e0265 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -100,6 +100,121 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) } } +static int data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return offsetof(struct ib_sa_mad, data); + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return offsetof(struct ib_vendor_mad, data); + else + return offsetof(struct ib_rmpp_mad, data); +} + +static void format_ack(struct ib_rmpp_mad *ack, + struct ib_rmpp_mad *data, + struct mad_rmpp_recv *rmpp_recv) +{ + unsigned long flags; + + memcpy(&ack->mad_hdr, &data->mad_hdr, + data_offset(data->mad_hdr.mgmt_class)); + + ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; + ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + rmpp_recv->last_ack = rmpp_recv->seg_num; + ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); + ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +} + +static void ack_recv(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + int hdr_len, ret; + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, rmpp_recv->ah, 1, + hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (!msg) + return; + + format_ack((struct ib_rmpp_mad *) msg->mad, + (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); + ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, + &bad_send_wr); + if (ret) + ib_free_send_mad(msg); +} + +static int alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + struct ib_ah *ah; + int hdr_len; + + ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, + recv_wc->recv_buf.grh, agent->port_num); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + m = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, ah, 1, hdr_len, + sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + *msg = m; + return 0; +} + +static void free_msg(struct ib_mad_send_buf *msg) +{ + ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_free_send_mad(msg); +} + +static void nack_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc, u8 rmpp_status) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + struct ib_send_wr *bad_send_wr; + int ret; + + ret = alloc_response_msg(&agent->agent, recv_wc, &msg); + if (ret) + return; + + rmpp_mad = (struct ib_rmpp_mad *) msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, + data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status; + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = 0; + + ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); + if (ret) + free_msg(msg); +} + static void recv_timeout_handler(void *data) { struct mad_rmpp_recv *rmpp_recv = data; @@ -115,8 +230,8 @@ static void recv_timeout_handler(void *data) list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - /* TODO: send abort. */ rmpp_wc = rmpp_recv->rmpp_wc; + nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L); destroy_rmpp_recv(rmpp_recv); ib_free_recv_mad(rmpp_wc); } @@ -230,60 +345,6 @@ insert_rmpp_recv(struct ib_mad_agent_private *agent, return cur_rmpp_recv; } -static int data_offset(u8 mgmt_class) -{ - if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return offsetof(struct ib_sa_mad, data); - else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && - (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return offsetof(struct ib_vendor_mad, data); - else - return offsetof(struct ib_rmpp_mad, data); -} - -static void format_ack(struct ib_rmpp_mad *ack, - struct ib_rmpp_mad *data, - struct mad_rmpp_recv *rmpp_recv) -{ - unsigned long flags; - - memcpy(&ack->mad_hdr, &data->mad_hdr, - data_offset(data->mad_hdr.mgmt_class)); - - ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; - ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; - ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - - spin_lock_irqsave(&rmpp_recv->lock, flags); - rmpp_recv->last_ack = rmpp_recv->seg_num; - ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); - ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); - spin_unlock_irqrestore(&rmpp_recv->lock, flags); -} - -static void ack_recv(struct mad_rmpp_recv *rmpp_recv, - struct ib_mad_recv_wc *recv_wc) -{ - struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; - int hdr_len, ret; - - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); - msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, rmpp_recv->ah, 1, - hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); - if (!msg) - return; - - format_ack((struct ib_rmpp_mad *) msg->mad, - (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); - ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, - &bad_send_wr); - if (ret) - ib_free_send_mad(msg); -} - static inline int get_last_flag(struct ib_mad_recv_buf *seg) { struct ib_rmpp_mad *rmpp_mad; @@ -559,6 +620,34 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) return ib_send_mad(mad_send_wr); } +static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, + u8 rmpp_status) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc wc; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, tid); + if (!mad_send_wr) + goto out; /* Unmatched send */ + + if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_REM_ABORT_ERR; + wc.vendor_err = rmpp_status; + wc.wr_id = mad_send_wr->wr_id; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -568,11 +657,21 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, int seg_num, newwin, ret; rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; - if (rmpp_mad->rmpp_hdr.rmpp_status) + if (rmpp_mad->rmpp_hdr.rmpp_status) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); return; + } seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (newwin < seg_num) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_W2S); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); + return; + } spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); @@ -583,8 +682,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ - if (seg_num > mad_send_wr->total_seg) - goto out; /* Bad ACK */ + if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { + spin_unlock_irqrestore(&agent->lock, flags); + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_S2B); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); + return; + } if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) goto out; /* Old ACK */ @@ -628,6 +732,72 @@ out: spin_unlock_irqrestore(&agent->lock, flags); } +static struct ib_mad_recv_wc * +process_rmpp_data(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_hdr *rmpp_hdr; + u8 rmpp_status; + + rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr; + + if (rmpp_hdr->rmpp_status) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS; + goto bad; + } + + if (rmpp_hdr->seg_num == __constant_htonl(1)) { + if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return start_rmpp(agent, mad_recv_wc); + } else { + if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return continue_rmpp(agent, mad_recv_wc); + } +bad: + nack_recv(agent, mad_recv_wc, rmpp_status); + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static void process_rmpp_stop(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, rmpp_mad->mad_hdr.tid, + rmpp_mad->rmpp_hdr.rmpp_status); +} + +static void process_rmpp_abort(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || + rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, rmpp_mad->mad_hdr.tid, + rmpp_mad->rmpp_hdr.rmpp_status); +} + struct ib_mad_recv_wc * ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) @@ -638,23 +808,29 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) return mad_recv_wc; - if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) + if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_UNV); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); goto out; + } switch (rmpp_mad->rmpp_hdr.rmpp_type) { case IB_MGMT_RMPP_TYPE_DATA: - if (rmpp_mad->rmpp_hdr.seg_num == __constant_htonl(1)) - return start_rmpp(agent, mad_recv_wc); - else - return continue_rmpp(agent, mad_recv_wc); + return process_rmpp_data(agent, mad_recv_wc); case IB_MGMT_RMPP_TYPE_ACK: process_rmpp_ack(agent, mad_recv_wc); break; case IB_MGMT_RMPP_TYPE_STOP: + process_rmpp_stop(agent, mad_recv_wc); + break; case IB_MGMT_RMPP_TYPE_ABORT: - /* TODO: process_rmpp_nack(agent, mad_recv_wc); */ + process_rmpp_abort(agent, mad_recv_wc); break; default: + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BADT); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); break; } out: @@ -714,7 +890,10 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { msg = (struct ib_mad_send_buf *) (unsigned long) mad_send_wc->wr_id; - ib_free_send_mad(msg); + if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) + ib_free_send_mad(msg); + else + free_msg(msg); return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ } diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index 63237805d6af..9fcf6fc09035 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -90,6 +90,7 @@ #define IB_MGMT_RMPP_STATUS_SUCCESS 0 #define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 #define IB_MGMT_RMPP_STATUS_T2L 118 #define IB_MGMT_RMPP_STATUS_BAD_LEN 119 #define IB_MGMT_RMPP_STATUS_BAD_SEG 120 @@ -100,6 +101,7 @@ #define IB_MGMT_RMPP_STATUS_UNV 125 #define IB_MGMT_RMPP_STATUS_TMR 126 #define IB_MGMT_RMPP_STATUS_UNSPEC 127 +#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 #define IB_QP0 0 #define IB_QP1 __constant_htonl(1) From 1ad62a19f177e61d4dde111ba35fb4badd0c2106 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Aug 2005 14:41:51 -0700 Subject: [PATCH 195/584] [PATCH] IPoIB: Fix device removal race Currently we may have work scheduled in default kernel workqueue when the device is going down. The device could get freed before this workqueue gets serviced. I am actually seeing this causing system hangs. The following patch fixes this by using ipoib_workqueue which gets flushed when the device is going down. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 57c3ac98991f..0e8ac138e355 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -672,7 +672,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - schedule_work(&priv->restart_task); + queue_work(ipoib_workqueue, &priv->restart_task); } static void ipoib_neigh_destructor(struct neighbour *n) From a4d61e84804f3b14cc35c5e2af768a07c0f64ef6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 25 Aug 2005 13:40:04 -0700 Subject: [PATCH 196/584] [PATCH] IB: move include files to include/rdma Move the InfiniBand headers from drivers/infiniband/include to include/rdma. This allows InfiniBand-using code to live elsewhere, and lets us remove the ugly EXTRA_CFLAGS include path from the InfiniBand Makefiles. Signed-off-by: Roland Dreier --- drivers/infiniband/core/Makefile | 2 -- drivers/infiniband/core/agent.c | 2 +- drivers/infiniband/core/cache.c | 2 +- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/cm_msgs.h | 2 +- drivers/infiniband/core/core_priv.h | 2 +- drivers/infiniband/core/fmr_pool.c | 2 +- drivers/infiniband/core/mad_priv.h | 4 ++-- drivers/infiniband/core/packer.c | 2 +- drivers/infiniband/core/sa_query.c | 4 ++-- drivers/infiniband/core/smi.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/core/ucm.h | 4 ++-- drivers/infiniband/core/ud_header.c | 2 +- drivers/infiniband/core/user_mad.c | 4 ++-- drivers/infiniband/core/uverbs.h | 4 ++-- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/hw/mthca/Makefile | 2 -- drivers/infiniband/hw/mthca/mthca_av.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.h | 2 +- drivers/infiniband/hw/mthca/mthca_cq.c | 2 +- drivers/infiniband/hw/mthca/mthca_mad.c | 6 +++--- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.h | 4 ++-- drivers/infiniband/hw/mthca/mthca_qp.c | 6 +++--- drivers/infiniband/ulp/ipoib/Makefile | 2 -- drivers/infiniband/ulp/ipoib/ipoib.h | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- {drivers/infiniband/include => include/rdma}/ib_cache.h | 2 +- {drivers/infiniband/include => include/rdma}/ib_cm.h | 4 ++-- {drivers/infiniband/include => include/rdma}/ib_fmr_pool.h | 2 +- {drivers/infiniband/include => include/rdma}/ib_mad.h | 2 +- {drivers/infiniband/include => include/rdma}/ib_pack.h | 2 +- {drivers/infiniband/include => include/rdma}/ib_sa.h | 4 ++-- {drivers/infiniband/include => include/rdma}/ib_smi.h | 2 +- {drivers/infiniband/include => include/rdma}/ib_user_cm.h | 0 {drivers/infiniband/include => include/rdma}/ib_user_mad.h | 0 .../infiniband/include => include/rdma}/ib_user_verbs.h | 0 {drivers/infiniband/include => include/rdma}/ib_verbs.h | 0 41 files changed, 51 insertions(+), 57 deletions(-) rename {drivers/infiniband/include => include/rdma}/ib_cache.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_cm.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_fmr_pool.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_mad.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_pack.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_sa.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_smi.h (99%) rename {drivers/infiniband/include => include/rdma}/ib_user_cm.h (100%) rename {drivers/infiniband/include => include/rdma}/ib_user_mad.h (100%) rename {drivers/infiniband/include => include/rdma}/ib_user_verbs.h (100%) rename {drivers/infiniband/include => include/rdma}/ib_verbs.h (100%) diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 10be36731ed7..678a7e097f32 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ ib_cm.o ib_umad.o ib_ucm.o obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 3d36feb8c5ba..5ac86f566dc0 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -41,7 +41,7 @@ #include -#include +#include #include "smi.h" #include "agent_priv.h" diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9376e53f50f2..f014e639088c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include "core_priv.h" diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 781be773a186..4de93ba274a6 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -43,8 +43,8 @@ #include #include -#include -#include +#include +#include #include "cm_msgs.h" MODULE_AUTHOR("Sean Hefty"); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 807a9fbb38f5..813ab70bf6d5 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -34,7 +34,7 @@ #if !defined(CM_MSGS_H) #define CM_MSGS_H -#include +#include /* * Parameters to routines below should be in network-byte order, and values diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 797049626ff6..7ad47a4b166b 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,7 +38,7 @@ #include #include -#include +#include int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 1f7374927f38..d34a6f1c4f4c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include "core_priv.h" diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 807b0f366353..f1ba794e0daa 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -40,8 +40,8 @@ #include #include #include -#include -#include +#include +#include #define PFX "ib_mad: " diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index ed1684b09f92..35df5010e723 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -33,7 +33,7 @@ * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ -#include +#include static u64 value_read(int offset, int size, void *structure) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b03bed2ed87a..126ac80db7b8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -44,8 +44,8 @@ #include #include -#include -#include +#include +#include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 1c0d733c3fce..35852e794e26 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -37,7 +37,7 @@ * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $ */ -#include +#include #include "smi.h" /* diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index bf7334e7fac6..fae1c2dcee51 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -36,7 +36,7 @@ #include "core_priv.h" -#include +#include struct ib_port { struct kobject kobj; diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h index 039e8a3783c0..c8819b928a1b 100644 --- a/drivers/infiniband/core/ucm.h +++ b/drivers/infiniband/core/ucm.h @@ -40,8 +40,8 @@ #include #include -#include -#include +#include +#include #define IB_UCM_CM_ID_INVALID 0xffffffff diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 89cd76d7c5a5..527b23450ab3 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -35,7 +35,7 @@ #include -#include +#include #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 16d91f187758..7c2f03057ddb 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -49,8 +49,8 @@ #include #include -#include -#include +#include +#include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index db161810c0c0..180b3d4765e4 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -45,8 +45,8 @@ #include #include -#include -#include +#include +#include struct ib_uverbs_device { int devnum; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c035510c5a36..5081d903e561 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -41,8 +41,8 @@ #include #include -#include -#include +#include +#include /* Protection domains */ diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index 1eb87408e069..c44f7bae5424 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - ifdef CONFIG_INFINIBAND_MTHCA_DEBUG EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e596210f11b3..889e85096736 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -35,8 +35,8 @@ #include -#include -#include +#include +#include #include "mthca_dev.h" diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 60e4b213635a..cc758a2d2bc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include "mthca_dev.h" #include "mthca_config_reg.h" diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index ef2a765d6953..65f976a13e02 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -36,7 +36,7 @@ #ifndef MTHCA_CMD_H #define MTHCA_CMD_H -#include +#include #define MTHCA_MAILBOX_SIZE 4096 diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 5ece609c2ee0..8600b6c3e0c2 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include "mthca_dev.h" #include "mthca_cmd.h" diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 64fa78722cf6..9804174f7f3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -34,9 +34,9 @@ * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ -#include -#include -#include +#include +#include +#include #include "mthca_dev.h" #include "mthca_cmd.h" diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23ceb26af8fe..1c1c2e230871 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -36,7 +36,7 @@ * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $ */ -#include +#include #include #include "mthca_dev.h" diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 024015678c8a..bcd4b01a339c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -37,8 +37,8 @@ #ifndef MTHCA_PROVIDER_H #define MTHCA_PROVIDER_H -#include -#include +#include +#include #define MTHCA_MPT_FLAG_ATOMIC (1 << 14) #define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 7607b9800736..0164b84d4ec6 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -37,9 +37,9 @@ #include -#include -#include -#include +#include +#include +#include #include "mthca_dev.h" #include "mthca_cmd.h" diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 394bc08abc6f..8935e74ae3f8 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o ib_ipoib-y := ipoib_main.o \ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e23041c7be8f..bea960b8191f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -51,9 +51,9 @@ #include #include -#include -#include -#include +#include +#include +#include /* constants */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index cb4f8062677c..ef0e3894863c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 21b58aa76fee..79f59d0563ed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -33,7 +33,7 @@ * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ -#include +#include #include "ipoib.h" diff --git a/drivers/infiniband/include/ib_cache.h b/include/rdma/ib_cache.h similarity index 99% rename from drivers/infiniband/include/ib_cache.h rename to include/rdma/ib_cache.h index fff031bc95df..5bf9834f7dca 100644 --- a/drivers/infiniband/include/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -37,7 +37,7 @@ #ifndef _IB_CACHE_H #define _IB_CACHE_H -#include +#include /** * ib_get_cached_gid - Returns a cached GID table entry diff --git a/drivers/infiniband/include/ib_cm.h b/include/rdma/ib_cm.h similarity index 99% rename from drivers/infiniband/include/ib_cm.h rename to include/rdma/ib_cm.h index 8202ad2e6435..77fe9039209b 100644 --- a/drivers/infiniband/include/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -37,8 +37,8 @@ #if !defined(IB_CM_H) #define IB_CM_H -#include -#include +#include +#include enum ib_cm_state { IB_CM_IDLE, diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h similarity index 99% rename from drivers/infiniband/include/ib_fmr_pool.h rename to include/rdma/ib_fmr_pool.h index 6c9e24d6e144..86b7e93f198b 100644 --- a/drivers/infiniband/include/ib_fmr_pool.h +++ b/include/rdma/ib_fmr_pool.h @@ -36,7 +36,7 @@ #if !defined(IB_FMR_POOL_H) #define IB_FMR_POOL_H -#include +#include struct ib_fmr_pool; diff --git a/drivers/infiniband/include/ib_mad.h b/include/rdma/ib_mad.h similarity index 99% rename from drivers/infiniband/include/ib_mad.h rename to include/rdma/ib_mad.h index 9fcf6fc09035..fc6b1c18ffc6 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -41,7 +41,7 @@ #include -#include +#include /* Management base version */ #define IB_MGMT_BASE_VERSION 1 diff --git a/drivers/infiniband/include/ib_pack.h b/include/rdma/ib_pack.h similarity index 99% rename from drivers/infiniband/include/ib_pack.h rename to include/rdma/ib_pack.h index fe480f3e8654..f926020d6331 100644 --- a/drivers/infiniband/include/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -35,7 +35,7 @@ #ifndef IB_PACK_H #define IB_PACK_H -#include +#include enum { IB_LRH_BYTES = 8, diff --git a/drivers/infiniband/include/ib_sa.h b/include/rdma/ib_sa.h similarity index 99% rename from drivers/infiniband/include/ib_sa.h rename to include/rdma/ib_sa.h index 326159c04aca..c022edfc49da 100644 --- a/drivers/infiniband/include/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -38,8 +38,8 @@ #include -#include -#include +#include +#include enum { IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ diff --git a/drivers/infiniband/include/ib_smi.h b/include/rdma/ib_smi.h similarity index 99% rename from drivers/infiniband/include/ib_smi.h rename to include/rdma/ib_smi.h index c07b31cb9499..87f60737f695 100644 --- a/drivers/infiniband/include/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -39,7 +39,7 @@ #if !defined( IB_SMI_H ) #define IB_SMI_H -#include +#include #define IB_SMP_DATA_SIZE 64 #define IB_SMP_MAX_PATH_HOPS 64 diff --git a/drivers/infiniband/include/ib_user_cm.h b/include/rdma/ib_user_cm.h similarity index 100% rename from drivers/infiniband/include/ib_user_cm.h rename to include/rdma/ib_user_cm.h diff --git a/drivers/infiniband/include/ib_user_mad.h b/include/rdma/ib_user_mad.h similarity index 100% rename from drivers/infiniband/include/ib_user_mad.h rename to include/rdma/ib_user_mad.h diff --git a/drivers/infiniband/include/ib_user_verbs.h b/include/rdma/ib_user_verbs.h similarity index 100% rename from drivers/infiniband/include/ib_user_verbs.h rename to include/rdma/ib_user_verbs.h diff --git a/drivers/infiniband/include/ib_verbs.h b/include/rdma/ib_verbs.h similarity index 100% rename from drivers/infiniband/include/ib_verbs.h rename to include/rdma/ib_verbs.h From d18d36b4edbb980c9de7fe00724c3ded5de1b7a7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 27 Aug 2005 04:13:52 -0400 Subject: [PATCH 197/584] libata: fix a few alan-isms --- drivers/scsi/sata_qstor.c | 2 +- include/linux/libata.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 2926846cdd6a..93fd06fb4f15 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -198,7 +198,7 @@ static int qs_check_atapi_dma(struct ata_queued_cmd *qc) return 1; /* ATAPI DMA not supported */ } -static void qs_bmdma_stop(struct ata_queud_cmd *qc) +static void qs_bmdma_stop(struct ata_queued_cmd *qc) { /* nothing */ } diff --git a/include/linux/libata.h b/include/linux/libata.h index 33f3ab4eb827..7c09540c52bc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -424,7 +424,6 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); -extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); From 135932651fd1eeb95eb6c5d4f6652aae73fe2c24 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 27 Aug 2005 04:20:12 -0400 Subject: [PATCH 198/584] [libata scsi] fix read/write translation edge cases Fix bugs for unlikely edge cases noticed by Douglas Gilbert: - When READ(6)/WRITE(6) sector count == 0, treat it as 256 sectors - For other READ(x)/WRITE(x), when sector count == 0, error. We don't support successfully completing zero-length transfers at this time. --- drivers/scsi/libata-scsi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index f58311b8c050..4074e7877ba3 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -630,11 +630,19 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) tf->lbah = scsicmd[3]; VPRINTK("ten-byte command\n"); + if (qc->nsect == 0) /* we don't support length==0 cmds */ + return 1; return 0; } if (scsicmd[0] == READ_6 || scsicmd[0] == WRITE_6) { qc->nsect = tf->nsect = scsicmd[4]; + if (!qc->nsect) { + qc->nsect = 256; + if (lba48) + tf->hob_nsect = 1; + } + tf->lbal = scsicmd[3]; tf->lbam = scsicmd[2]; tf->lbah = scsicmd[1] & 0x1f; /* mask out reserved bits */ @@ -674,6 +682,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) tf->lbah = scsicmd[7]; VPRINTK("sixteen-byte command\n"); + if (qc->nsect == 0) /* we don't support length==0 cmds */ + return 1; return 0; } From 84a2ea1c2cee0288f96e0c6aa4f975d4d26508c7 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 25 Aug 2005 19:38:30 +0100 Subject: [PATCH 199/584] [PATCH] 6pack Timer initialization I dropped the timer initialization bits by accident when sending the p-persistence fix. This patch gets the driver to work again on halfduplex links. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: Jeff Garzik --- drivers/net/hamradio/6pack.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index f9e3be96963c..b59c65b9645b 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -668,6 +668,9 @@ static int sixpack_open(struct tty_struct *tty) netif_start_queue(dev); init_timer(&sp->tx_t); + sp->tx_t.function = sp_xmit_on_air; + sp->tx_t.data = (unsigned long) sp; + init_timer(&sp->resync_t); spin_unlock_bh(&sp->lock); From 214838a2108b4b1e18abce2e28d37996e9bf7c68 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 24 Aug 2005 18:01:33 +0100 Subject: [PATCH 200/584] [PATCH] Fix 6pack setting of MAC address Don't check type of sax25_family; dev_set_mac_address has already done that before and anyway, the type to check against would have been ARPHRD_AX25. We only got away because AF_AX25 and ARPHRD_AX25 both happen to be defined to the same value. Don't check sax25_ndigis either; it's value is insignificant for the purpose of setting the MAC address and the check has shown to break some application software for no good reason. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: Jeff Garzik --- drivers/net/hamradio/6pack.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index b59c65b9645b..0b230222bfea 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -308,12 +308,6 @@ static int sp_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr_ax25 *sa = addr; - if (sa->sax25_family != AF_AX25) - return -EINVAL; - - if (!sa->sax25_ndigis) - return -EINVAL; - spin_lock_irq(&dev->xmit_lock); memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); spin_unlock_irq(&dev->xmit_lock); From 815f62bf742718458ba822a7e1f51f285eb997f2 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 24 Aug 2005 18:06:36 +0100 Subject: [PATCH 201/584] [PATCH] SMP rewrite of mkiss Rewrite the mkiss driver to make it SMP-proof following the example of 6pack.c. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: Jeff Garzik --- drivers/net/hamradio/Kconfig | 2 +- drivers/net/hamradio/mkiss.c | 1169 ++++++++++++++++------------------ 2 files changed, 566 insertions(+), 605 deletions(-) diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig index 0cd54306e636..de087cd609d9 100644 --- a/drivers/net/hamradio/Kconfig +++ b/drivers/net/hamradio/Kconfig @@ -1,6 +1,6 @@ config MKISS tristate "Serial port KISS driver" - depends on AX25 && BROKEN_ON_SMP + depends on AX25 ---help--- KISS is a protocol used for the exchange of data between a computer and a Terminal Node Controller (a small embedded system commonly diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index e94952e799fe..63b1a2b86acb 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -1,30 +1,19 @@ /* - * MKISS Driver + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. * - * This module: - * This module is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. * - * This module implements the AX.25 protocol for kernel-based - * devices like TTYs. It interfaces between a raw TTY, and the - * kernel's AX.25 protocol layers, just like slip.c. - * AX.25 needs to be separated from slip.c while slip.c is no - * longer a static kernel device since it is a module. - * This method clears the way to implement other kiss protocols - * like mkiss smack g8bpq ..... so far only mkiss is implemented. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Hans Alblas - * - * History - * Jonathan (G4KLX) Fixed to match Linux networking changes - 2.1.15. - * Matthias (DG2FEF) Added support for FlexNet CRC (on special request) - * Fixed bug in ax25_close(): dev_lock_wait() was - * called twice, causing a deadlock. - * Jeroen (PE1RXQ) Removed old MKISS_MAGIC stuff and calls to - * MOD_*_USE_COUNT - * Remove cli() and fix rtnl lock usage. + * Copyright (C) Hans Alblas PE1AYX + * Copyright (C) 2004, 05 Ralf Baechle DL5RB */ #include @@ -50,174 +39,296 @@ #include -#include "mkiss.h" - #ifdef CONFIG_INET #include #include #endif -static char banner[] __initdata = KERN_INFO "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n"; +#define AX_MTU 236 -typedef struct ax25_ctrl { - struct ax_disp ctrl; /* */ - struct net_device dev; /* the device */ -} ax25_ctrl_t; +/* SLIP/KISS protocol characters. */ +#define END 0300 /* indicates end of frame */ +#define ESC 0333 /* indicates byte stuffing */ +#define ESC_END 0334 /* ESC ESC_END means END 'data' */ +#define ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ -static ax25_ctrl_t **ax25_ctrls; +struct mkiss { + struct tty_struct *tty; /* ptr to TTY structure */ + struct net_device *dev; /* easy for intr handling */ -int ax25_maxdev = AX25_MAXDEV; /* Can be overridden with insmod! */ + /* These are pointers to the malloc()ed frame buffers. */ + spinlock_t buflock;/* lock for rbuf and xbuf */ + unsigned char *rbuff; /* receiver buffer */ + int rcount; /* received chars counter */ + unsigned char *xbuff; /* transmitter buffer */ + unsigned char *xhead; /* pointer to next byte to XMIT */ + int xleft; /* bytes left in XMIT queue */ -static struct tty_ldisc ax_ldisc; + struct net_device_stats stats; -static int ax25_init(struct net_device *); -static int kiss_esc(unsigned char *, unsigned char *, int); -static int kiss_esc_crc(unsigned char *, unsigned char *, unsigned short, int); -static void kiss_unesc(struct ax_disp *, unsigned char); + /* Detailed SLIP statistics. */ + int mtu; /* Our mtu (to spot changes!) */ + int buffsize; /* Max buffers sizes */ -/*---------------------------------------------------------------------------*/ + unsigned long flags; /* Flag values/ mode etc */ + /* long req'd: used by set_bit --RR */ +#define AXF_INUSE 0 /* Channel in use */ +#define AXF_ESCAPE 1 /* ESC received */ +#define AXF_ERROR 2 /* Parity, etc. error */ +#define AXF_KEEPTEST 3 /* Keepalive test flag */ +#define AXF_OUTWAIT 4 /* is outpacket was flag */ -static const unsigned short Crc_flex_table[] = { - 0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38, - 0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770, - 0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9, - 0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1, - 0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a, - 0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672, - 0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb, - 0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3, - 0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c, - 0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574, - 0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd, - 0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5, - 0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e, - 0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476, - 0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf, - 0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7, - 0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30, - 0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378, - 0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1, - 0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9, - 0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32, - 0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a, - 0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3, - 0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb, - 0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34, - 0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c, - 0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5, - 0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd, - 0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36, - 0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e, - 0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7, - 0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff + int mode; + int crcmode; /* MW: for FlexNet, SMACK etc. */ +#define CRC_MODE_NONE 0 +#define CRC_MODE_FLEX 1 +#define CRC_MODE_SMACK 2 + + atomic_t refcnt; + struct semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ +static const unsigned short crc_flex_table[] = { + 0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38, + 0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770, + 0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9, + 0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1, + 0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a, + 0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672, + 0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb, + 0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3, + 0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c, + 0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574, + 0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd, + 0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5, + 0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e, + 0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476, + 0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf, + 0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7, + 0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30, + 0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378, + 0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1, + 0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9, + 0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32, + 0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a, + 0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3, + 0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb, + 0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34, + 0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c, + 0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5, + 0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd, + 0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36, + 0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e, + 0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7, + 0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff +}; + static unsigned short calc_crc_flex(unsigned char *cp, int size) { - unsigned short crc = 0xffff; - - while (size--) - crc = (crc << 8) ^ Crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; + unsigned short crc = 0xffff; - return crc; + while (size--) + crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; + + return crc; } -/*---------------------------------------------------------------------------*/ - static int check_crc_flex(unsigned char *cp, int size) { - unsigned short crc = 0xffff; + unsigned short crc = 0xffff; - if (size < 3) - return -1; + if (size < 3) + return -1; - while (size--) - crc = (crc << 8) ^ Crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; + while (size--) + crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; - if ((crc & 0xffff) != 0x7070) - return -1; + if ((crc & 0xffff) != 0x7070) + return -1; - return 0; + return 0; +} + +/* + * Standard encapsulation + */ + +static int kiss_esc(unsigned char *s, unsigned char *d, int len) +{ + unsigned char *ptr = d; + unsigned char c; + + /* + * Send an initial END character to flush out any data that may have + * accumulated in the receiver due to line noise. + */ + + *ptr++ = END; + + while (len-- > 0) { + switch (c = *s++) { + case END: + *ptr++ = ESC; + *ptr++ = ESC_END; + break; + case ESC: + *ptr++ = ESC; + *ptr++ = ESC_ESC; + break; + default: + *ptr++ = c; + break; + } + } + + *ptr++ = END; + + return ptr - d; +} + +/* + * MW: + * OK its ugly, but tell me a better solution without copying the + * packet to a temporary buffer :-) + */ +static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, + int len) +{ + unsigned char *ptr = d; + unsigned char c=0; + + *ptr++ = END; + while (len > 0) { + if (len > 2) + c = *s++; + else if (len > 1) + c = crc >> 8; + else if (len > 0) + c = crc & 0xff; + + len--; + + switch (c) { + case END: + *ptr++ = ESC; + *ptr++ = ESC_END; + break; + case ESC: + *ptr++ = ESC; + *ptr++ = ESC_ESC; + break; + default: + *ptr++ = c; + break; + } + } + *ptr++ = END; + + return ptr - d; +} + +/* Send one completely decapsulated AX.25 packet to the AX.25 layer. */ +static void ax_bump(struct mkiss *ax) +{ + struct sk_buff *skb; + int count; + + spin_lock_bh(&ax->buflock); + if (ax->rbuff[0] > 0x0f) { + if (ax->rbuff[0] & 0x20) { + ax->crcmode = CRC_MODE_FLEX; + if (check_crc_flex(ax->rbuff, ax->rcount) < 0) { + ax->stats.rx_errors++; + return; + } + ax->rcount -= 2; + /* dl9sau bugfix: the trailling two bytes flexnet crc + * will not be passed to the kernel. thus we have + * to correct the kissparm signature, because it + * indicates a crc but there's none + */ + *ax->rbuff &= ~0x20; + } + } + spin_unlock_bh(&ax->buflock); + + count = ax->rcount; + + if ((skb = dev_alloc_skb(count)) == NULL) { + printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", + ax->dev->name); + ax->stats.rx_dropped++; + return; + } + + spin_lock_bh(&ax->buflock); + memcpy(skb_put(skb,count), ax->rbuff, count); + spin_unlock_bh(&ax->buflock); + skb->protocol = ax25_type_trans(skb, ax->dev); + netif_rx(skb); + ax->dev->last_rx = jiffies; + ax->stats.rx_packets++; + ax->stats.rx_bytes += count; +} + +static void kiss_unesc(struct mkiss *ax, unsigned char s) +{ + switch (s) { + case END: + /* drop keeptest bit = VSV */ + if (test_bit(AXF_KEEPTEST, &ax->flags)) + clear_bit(AXF_KEEPTEST, &ax->flags); + + if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2)) + ax_bump(ax); + + clear_bit(AXF_ESCAPE, &ax->flags); + ax->rcount = 0; + return; + + case ESC: + set_bit(AXF_ESCAPE, &ax->flags); + return; + case ESC_ESC: + if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) + s = ESC; + break; + case ESC_END: + if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) + s = END; + break; + } + + spin_lock_bh(&ax->buflock); + if (!test_bit(AXF_ERROR, &ax->flags)) { + if (ax->rcount < ax->buffsize) { + ax->rbuff[ax->rcount++] = s; + spin_unlock_bh(&ax->buflock); + return; + } + + ax->stats.rx_over_errors++; + set_bit(AXF_ERROR, &ax->flags); + } + spin_unlock_bh(&ax->buflock); +} + +static int ax_set_mac_address(struct net_device *dev, void *addr) +{ + struct sockaddr_ax25 *sa = addr; + + spin_lock_irq(&dev->xmit_lock); + memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); + spin_unlock_irq(&dev->xmit_lock); + + return 0; } /*---------------------------------------------------------------------------*/ -/* Find a free channel, and link in this `tty' line. */ -static inline struct ax_disp *ax_alloc(void) -{ - ax25_ctrl_t *axp=NULL; - int i; - - for (i = 0; i < ax25_maxdev; i++) { - axp = ax25_ctrls[i]; - - /* Not allocated ? */ - if (axp == NULL) - break; - - /* Not in use ? */ - if (!test_and_set_bit(AXF_INUSE, &axp->ctrl.flags)) - break; - } - - /* Sorry, too many, all slots in use */ - if (i >= ax25_maxdev) - return NULL; - - /* If no channels are available, allocate one */ - if (axp == NULL && (ax25_ctrls[i] = kmalloc(sizeof(ax25_ctrl_t), GFP_KERNEL)) != NULL) { - axp = ax25_ctrls[i]; - } - memset(axp, 0, sizeof(ax25_ctrl_t)); - - /* Initialize channel control data */ - set_bit(AXF_INUSE, &axp->ctrl.flags); - sprintf(axp->dev.name, "ax%d", i++); - axp->ctrl.tty = NULL; - axp->dev.base_addr = i; - axp->dev.priv = (void *)&axp->ctrl; - axp->dev.next = NULL; - axp->dev.init = ax25_init; - - if (axp != NULL) { - /* - * register device so that it can be ifconfig'ed - * ax25_init() will be called as a side-effect - * SIDE-EFFECT WARNING: ax25_init() CLEARS axp->ctrl ! - */ - if (register_netdev(&axp->dev) == 0) { - /* (Re-)Set the INUSE bit. Very Important! */ - set_bit(AXF_INUSE, &axp->ctrl.flags); - axp->ctrl.dev = &axp->dev; - axp->dev.priv = (void *) &axp->ctrl; - - return &axp->ctrl; - } else { - clear_bit(AXF_INUSE,&axp->ctrl.flags); - printk(KERN_ERR "mkiss: ax_alloc() - register_netdev() failure.\n"); - } - } - - return NULL; -} - -/* Free an AX25 channel. */ -static inline void ax_free(struct ax_disp *ax) -{ - /* Free all AX25 frame buffers. */ - if (ax->rbuff) - kfree(ax->rbuff); - ax->rbuff = NULL; - if (ax->xbuff) - kfree(ax->xbuff); - ax->xbuff = NULL; - if (!test_and_clear_bit(AXF_INUSE, &ax->flags)) - printk(KERN_ERR "mkiss: %s: ax_free for already free unit.\n", ax->dev->name); -} - -static void ax_changedmtu(struct ax_disp *ax) +static void ax_changedmtu(struct mkiss *ax) { struct net_device *dev = ax->dev; unsigned char *xbuff, *rbuff, *oxbuff, *orbuff; @@ -237,7 +348,8 @@ static void ax_changedmtu(struct ax_disp *ax) rbuff = kmalloc(len + 4, GFP_ATOMIC); if (xbuff == NULL || rbuff == NULL) { - printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, MTU change cancelled.\n", + printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, " + "MTU change cancelled.\n", ax->dev->name); dev->mtu = ax->mtu; if (xbuff != NULL) @@ -259,7 +371,7 @@ static void ax_changedmtu(struct ax_disp *ax) memcpy(ax->xbuff, ax->xhead, ax->xleft); } else { ax->xleft = 0; - ax->tx_dropped++; + ax->stats.tx_dropped++; } } @@ -270,7 +382,7 @@ static void ax_changedmtu(struct ax_disp *ax) memcpy(ax->rbuff, orbuff, ax->rcount); } else { ax->rcount = 0; - ax->rx_over_errors++; + ax->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } } @@ -280,72 +392,14 @@ static void ax_changedmtu(struct ax_disp *ax) spin_unlock_bh(&ax->buflock); - if (oxbuff != NULL) - kfree(oxbuff); - if (orbuff != NULL) - kfree(orbuff); -} - - -/* Set the "sending" flag. This must be atomic. */ -static inline void ax_lock(struct ax_disp *ax) -{ - netif_stop_queue(ax->dev); -} - - -/* Clear the "sending" flag. This must be atomic. */ -static inline void ax_unlock(struct ax_disp *ax) -{ - netif_start_queue(ax->dev); -} - -/* Send one completely decapsulated AX.25 packet to the AX.25 layer. */ -static void ax_bump(struct ax_disp *ax) -{ - struct sk_buff *skb; - int count; - - spin_lock_bh(&ax->buflock); - if (ax->rbuff[0] > 0x0f) { - if (ax->rbuff[0] & 0x20) { - ax->crcmode = CRC_MODE_FLEX; - if (check_crc_flex(ax->rbuff, ax->rcount) < 0) { - ax->rx_errors++; - return; - } - ax->rcount -= 2; - /* dl9sau bugfix: the trailling two bytes flexnet crc - * will not be passed to the kernel. thus we have - * to correct the kissparm signature, because it - * indicates a crc but there's none - */ - *ax->rbuff &= ~0x20; - } - } - spin_unlock_bh(&ax->buflock); - - count = ax->rcount; - - if ((skb = dev_alloc_skb(count)) == NULL) { - printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", ax->dev->name); - ax->rx_dropped++; - return; - } - - spin_lock_bh(&ax->buflock); - memcpy(skb_put(skb,count), ax->rbuff, count); - spin_unlock_bh(&ax->buflock); - skb->protocol = ax25_type_trans(skb, ax->dev); - netif_rx(skb); - ax->dev->last_rx = jiffies; - ax->rx_packets++; - ax->rx_bytes+=count; + kfree(oxbuff); + kfree(orbuff); } /* Encapsulate one AX.25 packet and stuff into a TTY queue. */ -static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len) +static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) { + struct mkiss *ax = netdev_priv(dev); unsigned char *p; int actual, count; @@ -355,8 +409,8 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len) if (len > ax->mtu) { /* Sigh, shouldn't occur BUT ... */ len = ax->mtu; printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name); - ax->tx_dropped++; - ax_unlock(ax); + ax->stats.tx_dropped++; + netif_start_queue(dev); return; } @@ -377,10 +431,11 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len) break; } - ax->tty->flags |= (1 << TTY_DO_WRITE_WAKEUP); + set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); actual = ax->tty->driver->write(ax->tty, ax->xbuff, count); - ax->tx_packets++; - ax->tx_bytes+=actual; + ax->stats.tx_packets++; + ax->stats.tx_bytes += actual; + ax->dev->trans_start = jiffies; ax->xleft = count - actual; ax->xhead = ax->xbuff + actual; @@ -388,37 +443,10 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len) spin_unlock_bh(&ax->buflock); } -/* - * Called by the driver when there's room for more data. If we have - * more packets to send, we send them here. - */ -static void ax25_write_wakeup(struct tty_struct *tty) -{ - int actual; - struct ax_disp *ax = (struct ax_disp *) tty->disc_data; - - /* First make sure we're connected. */ - if (ax == NULL || ax->magic != AX25_MAGIC || !netif_running(ax->dev)) - return; - if (ax->xleft <= 0) { - /* Now serial buffer is almost free & we can start - * transmission of another packet - */ - tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); - - netif_wake_queue(ax->dev); - return; - } - - actual = tty->driver->write(tty, ax->xhead, ax->xleft); - ax->xleft -= actual; - ax->xhead += actual; -} - /* Encapsulate an AX.25 packet and kick it into a TTY queue. */ static int ax_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ax_disp *ax = netdev_priv(dev); + struct mkiss *ax = netdev_priv(dev); if (!netif_running(dev)) { printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name); @@ -440,20 +468,30 @@ static int ax_xmit(struct sk_buff *skb, struct net_device *dev) "bad line quality" : "driver error"); ax->xleft = 0; - ax->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); - ax_unlock(ax); + clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); + netif_start_queue(dev); } /* We were not busy, so we are now... :-) */ if (skb != NULL) { - ax_lock(ax); - ax_encaps(ax, skb->data, skb->len); + netif_stop_queue(dev); + ax_encaps(dev, skb->data, skb->len); kfree_skb(skb); } return 0; } +static int ax_open_dev(struct net_device *dev) +{ + struct mkiss *ax = netdev_priv(dev); + + if (ax->tty == NULL) + return -ENODEV; + + return 0; +} + #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) /* Return the frame type ID */ @@ -482,7 +520,7 @@ static int ax_rebuild_header(struct sk_buff *skb) /* Open the low-level part of the AX25 channel. Easy! */ static int ax_open(struct net_device *dev) { - struct ax_disp *ax = netdev_priv(dev); + struct mkiss *ax = netdev_priv(dev); unsigned long len; if (ax->tty == NULL) @@ -519,7 +557,6 @@ static int ax_open(struct net_device *dev) spin_lock_init(&ax->buflock); - netif_start_queue(dev); return 0; noxbuff: @@ -533,21 +570,223 @@ norbuff: /* Close the low-level part of the AX25 channel. Easy! */ static int ax_close(struct net_device *dev) { - struct ax_disp *ax = netdev_priv(dev); + struct mkiss *ax = netdev_priv(dev); - if (ax->tty == NULL) - return -EBUSY; - - ax->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); + if (ax->tty) + clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_stop_queue(dev); return 0; } -static int ax25_receive_room(struct tty_struct *tty) +static struct net_device_stats *ax_get_stats(struct net_device *dev) { - return 65536; /* We can handle an infinite amount of data. :-) */ + struct mkiss *ax = netdev_priv(dev); + + return &ax->stats; +} + +static void ax_setup(struct net_device *dev) +{ + static char ax25_bcast[AX25_ADDR_LEN] = + {'Q'<<1,'S'<<1,'T'<<1,' '<<1,' '<<1,' '<<1,'0'<<1}; + static char ax25_test[AX25_ADDR_LEN] = + {'L'<<1,'I'<<1,'N'<<1,'U'<<1,'X'<<1,' '<<1,'1'<<1}; + + /* Finish setting up the DEVICE info. */ + dev->mtu = AX_MTU; + dev->hard_start_xmit = ax_xmit; + dev->open = ax_open_dev; + dev->stop = ax_close; + dev->get_stats = ax_get_stats; + dev->set_mac_address = ax_set_mac_address; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->type = ARPHRD_AX25; + dev->tx_queue_len = 10; + dev->hard_header = ax_header; + dev->rebuild_header = ax_rebuild_header; + + memcpy(dev->broadcast, ax25_bcast, AX25_ADDR_LEN); + memcpy(dev->dev_addr, ax25_test, AX25_ADDR_LEN); + + dev->flags = IFF_BROADCAST | IFF_MULTICAST; +} + +/* + * We have a potential race on dereferencing tty->disc_data, because the tty + * layer provides no locking at all - thus one cpu could be running + * sixpack_receive_buf while another calls sixpack_close, which zeroes + * tty->disc_data and frees the memory that sixpack_receive_buf is using. The + * best way to fix this is to use a rwlock in the tty struct, but for now we + * use a single global rwlock for all ttys in ppp line discipline. + */ +static rwlock_t disc_data_lock = RW_LOCK_UNLOCKED; + +static struct mkiss *mkiss_get(struct tty_struct *tty) +{ + struct mkiss *ax; + + read_lock(&disc_data_lock); + ax = tty->disc_data; + if (ax) + atomic_inc(&ax->refcnt); + read_unlock(&disc_data_lock); + + return ax; +} + +static void mkiss_put(struct mkiss *ax) +{ + if (atomic_dec_and_test(&ax->refcnt)) + up(&ax->dead_sem); +} + +static int mkiss_open(struct tty_struct *tty) +{ + struct net_device *dev; + struct mkiss *ax; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + dev = alloc_netdev(sizeof(struct mkiss), "ax%d", ax_setup); + if (!dev) { + err = -ENOMEM; + goto out; + } + + ax = netdev_priv(dev); + ax->dev = dev; + + spin_lock_init(&ax->buflock); + atomic_set(&ax->refcnt, 1); + init_MUTEX_LOCKED(&ax->dead_sem); + + ax->tty = tty; + tty->disc_data = ax; + + if (tty->driver->flush_buffer) + tty->driver->flush_buffer(tty); + + /* Restore default settings */ + dev->type = ARPHRD_AX25; + + /* Perform the low-level AX25 initialization. */ + if ((err = ax_open(ax->dev))) { + goto out_free_netdev; + } + + if (register_netdev(dev)) + goto out_free_buffers; + + netif_start_queue(dev); + + /* Done. We have linked the TTY line to a channel. */ + return 0; + +out_free_buffers: + kfree(ax->rbuff); + kfree(ax->xbuff); + +out_free_netdev: + free_netdev(dev); + +out: + return err; +} + +static void mkiss_close(struct tty_struct *tty) +{ + struct mkiss *ax; + + write_lock(&disc_data_lock); + ax = tty->disc_data; + tty->disc_data = NULL; + write_unlock(&disc_data_lock); + + if (ax == 0) + return; + + /* + * We have now ensured that nobody can start using ap from now on, but + * we have to wait for all existing users to finish. + */ + if (!atomic_dec_and_test(&ax->refcnt)) + down(&ax->dead_sem); + + unregister_netdev(ax->dev); + + /* Free all AX25 frame buffers. */ + kfree(ax->rbuff); + kfree(ax->xbuff); + + ax->tty = NULL; +} + +/* Perform I/O control on an active ax25 channel. */ +static int mkiss_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct mkiss *ax = mkiss_get(tty); + struct net_device *dev = ax->dev; + unsigned int tmp, err; + + /* First make sure we're connected. */ + if (ax == NULL) + return -ENXIO; + + switch (cmd) { + case SIOCGIFNAME: + err = copy_to_user((void __user *) arg, ax->dev->name, + strlen(ax->dev->name) + 1) ? -EFAULT : 0; + break; + + case SIOCGIFENCAP: + err = put_user(4, (int __user *) arg); + break; + + case SIOCSIFENCAP: + if (get_user(tmp, (int __user *) arg)) { + err = -EFAULT; + break; + } + + ax->mode = tmp; + dev->addr_len = AX25_ADDR_LEN; + dev->hard_header_len = AX25_KISS_HEADER_LEN + + AX25_MAX_HEADER_LEN + 3; + dev->type = ARPHRD_AX25; + + err = 0; + break; + + case SIOCSIFHWADDR: { + char addr[AX25_ADDR_LEN]; +printk(KERN_INFO "In SIOCSIFHWADDR"); + + if (copy_from_user(&addr, + (void __user *) arg, AX25_ADDR_LEN)) { + err = -EFAULT; + break; + } + + spin_lock_irq(&dev->xmit_lock); + memcpy(dev->dev_addr, addr, AX25_ADDR_LEN); + spin_unlock_irq(&dev->xmit_lock); + + err = 0; + break; + } + default: + err = -ENOIOCTLCMD; + } + + mkiss_put(ax); + + return err; } /* @@ -556,11 +795,12 @@ static int ax25_receive_room(struct tty_struct *tty) * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ -static void ax25_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) +static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { - struct ax_disp *ax = (struct ax_disp *) tty->disc_data; + struct mkiss *ax = mkiss_get(tty); - if (ax == NULL || ax->magic != AX25_MAGIC || !netif_running(ax->dev)) + if (!ax) return; /* @@ -574,312 +814,70 @@ static void ax25_receive_buf(struct tty_struct *tty, const unsigned char *cp, ch while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) - ax->rx_errors++; + ax->stats.rx_errors++; cp++; continue; } kiss_unesc(ax, *cp++); } + + mkiss_put(ax); + if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) + && tty->driver->unthrottle) + tty->driver->unthrottle(tty); } -static int ax25_open(struct tty_struct *tty) +static int mkiss_receive_room(struct tty_struct *tty) { - struct ax_disp *ax = (struct ax_disp *) tty->disc_data; - int err; - - /* First make sure we're not already connected. */ - if (ax && ax->magic == AX25_MAGIC) - return -EEXIST; - - /* OK. Find a free AX25 channel to use. */ - if ((ax = ax_alloc()) == NULL) - return -ENFILE; - - ax->tty = tty; - tty->disc_data = ax; - - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - - /* Restore default settings */ - ax->dev->type = ARPHRD_AX25; - - /* Perform the low-level AX25 initialization. */ - if ((err = ax_open(ax->dev))) - return err; - - /* Done. We have linked the TTY line to a channel. */ - return ax->dev->base_addr; -} - -static void ax25_close(struct tty_struct *tty) -{ - struct ax_disp *ax = (struct ax_disp *) tty->disc_data; - - /* First make sure we're connected. */ - if (ax == NULL || ax->magic != AX25_MAGIC) - return; - - unregister_netdev(ax->dev); - - tty->disc_data = NULL; - ax->tty = NULL; - - ax_free(ax); -} - - -static struct net_device_stats *ax_get_stats(struct net_device *dev) -{ - static struct net_device_stats stats; - struct ax_disp *ax = netdev_priv(dev); - - memset(&stats, 0, sizeof(struct net_device_stats)); - - stats.rx_packets = ax->rx_packets; - stats.tx_packets = ax->tx_packets; - stats.rx_bytes = ax->rx_bytes; - stats.tx_bytes = ax->tx_bytes; - stats.rx_dropped = ax->rx_dropped; - stats.tx_dropped = ax->tx_dropped; - stats.tx_errors = ax->tx_errors; - stats.rx_errors = ax->rx_errors; - stats.rx_over_errors = ax->rx_over_errors; - - return &stats; -} - - -/************************************************************************ - * STANDARD ENCAPSULATION * - ************************************************************************/ - -static int kiss_esc(unsigned char *s, unsigned char *d, int len) -{ - unsigned char *ptr = d; - unsigned char c; - - /* - * Send an initial END character to flush out any - * data that may have accumulated in the receiver - * due to line noise. - */ - - *ptr++ = END; - - while (len-- > 0) { - switch (c = *s++) { - case END: - *ptr++ = ESC; - *ptr++ = ESC_END; - break; - case ESC: - *ptr++ = ESC; - *ptr++ = ESC_ESC; - break; - default: - *ptr++ = c; - break; - } - } - - *ptr++ = END; - - return ptr - d; + return 65536; /* We can handle an infinite amount of data. :-) */ } /* - * MW: - * OK its ugly, but tell me a better solution without copying the - * packet to a temporary buffer :-) + * Called by the driver when there's room for more data. If we have + * more packets to send, we send them here. */ -static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, int len) +static void mkiss_write_wakeup(struct tty_struct *tty) { - unsigned char *ptr = d; - unsigned char c=0; + struct mkiss *ax = mkiss_get(tty); + int actual; - *ptr++ = END; - while (len > 0) { - if (len > 2) - c = *s++; - else if (len > 1) - c = crc >> 8; - else if (len > 0) - c = crc & 0xff; + if (!ax) + return; - len--; + if (ax->xleft <= 0) { + /* Now serial buffer is almost free & we can start + * transmission of another packet + */ + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - switch (c) { - case END: - *ptr++ = ESC; - *ptr++ = ESC_END; - break; - case ESC: - *ptr++ = ESC; - *ptr++ = ESC_ESC; - break; - default: - *ptr++ = c; - break; - } - } - *ptr++ = END; - return ptr - d; -} - -static void kiss_unesc(struct ax_disp *ax, unsigned char s) -{ - switch (s) { - case END: - /* drop keeptest bit = VSV */ - if (test_bit(AXF_KEEPTEST, &ax->flags)) - clear_bit(AXF_KEEPTEST, &ax->flags); - - if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2)) - ax_bump(ax); - - clear_bit(AXF_ESCAPE, &ax->flags); - ax->rcount = 0; - return; - - case ESC: - set_bit(AXF_ESCAPE, &ax->flags); - return; - case ESC_ESC: - if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) - s = ESC; - break; - case ESC_END: - if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) - s = END; - break; + netif_wake_queue(ax->dev); + goto out; } - spin_lock_bh(&ax->buflock); - if (!test_bit(AXF_ERROR, &ax->flags)) { - if (ax->rcount < ax->buffsize) { - ax->rbuff[ax->rcount++] = s; - spin_unlock_bh(&ax->buflock); - return; - } + actual = tty->driver->write(tty, ax->xhead, ax->xleft); + ax->xleft -= actual; + ax->xhead += actual; - ax->rx_over_errors++; - set_bit(AXF_ERROR, &ax->flags); - } - spin_unlock_bh(&ax->buflock); +out: + mkiss_put(ax); } +static struct tty_ldisc ax_ldisc = { + .magic = TTY_LDISC_MAGIC, + .name = "mkiss", + .open = mkiss_open, + .close = mkiss_close, + .ioctl = mkiss_ioctl, + .receive_buf = mkiss_receive_buf, + .receive_room = mkiss_receive_room, + .write_wakeup = mkiss_write_wakeup +}; -static int ax_set_mac_address(struct net_device *dev, void __user *addr) -{ - if (copy_from_user(dev->dev_addr, addr, AX25_ADDR_LEN)) - return -EFAULT; - return 0; -} - -static int ax_set_dev_mac_address(struct net_device *dev, void *addr) -{ - struct sockaddr *sa = addr; - - memcpy(dev->dev_addr, sa->sa_data, AX25_ADDR_LEN); - - return 0; -} - - -/* Perform I/O control on an active ax25 channel. */ -static int ax25_disp_ioctl(struct tty_struct *tty, void *file, int cmd, void __user *arg) -{ - struct ax_disp *ax = (struct ax_disp *) tty->disc_data; - unsigned int tmp; - - /* First make sure we're connected. */ - if (ax == NULL || ax->magic != AX25_MAGIC) - return -EINVAL; - - switch (cmd) { - case SIOCGIFNAME: - if (copy_to_user(arg, ax->dev->name, strlen(ax->dev->name) + 1)) - return -EFAULT; - return 0; - - case SIOCGIFENCAP: - return put_user(4, (int __user *)arg); - - case SIOCSIFENCAP: - if (get_user(tmp, (int __user *)arg)) - return -EFAULT; - ax->mode = tmp; - ax->dev->addr_len = AX25_ADDR_LEN; /* sizeof an AX.25 addr */ - ax->dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3; - ax->dev->type = ARPHRD_AX25; - return 0; - - case SIOCSIFHWADDR: - return ax_set_mac_address(ax->dev, arg); - - default: - return -ENOIOCTLCMD; - } -} - -static int ax_open_dev(struct net_device *dev) -{ - struct ax_disp *ax = netdev_priv(dev); - - if (ax->tty == NULL) - return -ENODEV; - - return 0; -} - - -/* Initialize the driver. Called by network startup. */ -static int ax25_init(struct net_device *dev) -{ - struct ax_disp *ax = netdev_priv(dev); - - static char ax25_bcast[AX25_ADDR_LEN] = - {'Q'<<1,'S'<<1,'T'<<1,' '<<1,' '<<1,' '<<1,'0'<<1}; - static char ax25_test[AX25_ADDR_LEN] = - {'L'<<1,'I'<<1,'N'<<1,'U'<<1,'X'<<1,' '<<1,'1'<<1}; - - if (ax == NULL) /* Allocation failed ?? */ - return -ENODEV; - - /* Set up the "AX25 Control Block". (And clear statistics) */ - memset(ax, 0, sizeof (struct ax_disp)); - ax->magic = AX25_MAGIC; - ax->dev = dev; - - /* Finish setting up the DEVICE info. */ - dev->mtu = AX_MTU; - dev->hard_start_xmit = ax_xmit; - dev->open = ax_open_dev; - dev->stop = ax_close; - dev->get_stats = ax_get_stats; - dev->set_mac_address = ax_set_dev_mac_address; - dev->hard_header_len = 0; - dev->addr_len = 0; - dev->type = ARPHRD_AX25; - dev->tx_queue_len = 10; - dev->hard_header = ax_header; - dev->rebuild_header = ax_rebuild_header; - - memcpy(dev->broadcast, ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, ax25_test, AX25_ADDR_LEN); - - /* New-style flags. */ - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - - return 0; -} - - -/* ******************************************************************** */ -/* * Init MKISS driver * */ -/* ******************************************************************** */ +static char banner[] __initdata = KERN_INFO \ + "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n"; +static char msg_regfail[] __initdata = KERN_ERR \ + "mkiss: can't register line discipline (err = %d)\n"; static int __init mkiss_init_driver(void) { @@ -887,64 +885,27 @@ static int __init mkiss_init_driver(void) printk(banner); - if (ax25_maxdev < 4) - ax25_maxdev = 4; /* Sanity */ + if ((status = tty_register_ldisc(N_AX25, &ax_ldisc)) != 0) + printk(msg_regfail); - if ((ax25_ctrls = kmalloc(sizeof(void *) * ax25_maxdev, GFP_KERNEL)) == NULL) { - printk(KERN_ERR "mkiss: Can't allocate ax25_ctrls[] array!\n"); - return -ENOMEM; - } - - /* Clear the pointer array, we allocate devices when we need them */ - memset(ax25_ctrls, 0, sizeof(void*) * ax25_maxdev); /* Pointers */ - - /* Fill in our line protocol discipline, and register it */ - ax_ldisc.magic = TTY_LDISC_MAGIC; - ax_ldisc.name = "mkiss"; - ax_ldisc.open = ax25_open; - ax_ldisc.close = ax25_close; - ax_ldisc.ioctl = (int (*)(struct tty_struct *, struct file *, - unsigned int, unsigned long))ax25_disp_ioctl; - ax_ldisc.receive_buf = ax25_receive_buf; - ax_ldisc.receive_room = ax25_receive_room; - ax_ldisc.write_wakeup = ax25_write_wakeup; - - if ((status = tty_register_ldisc(N_AX25, &ax_ldisc)) != 0) { - printk(KERN_ERR "mkiss: can't register line discipline (err = %d)\n", status); - kfree(ax25_ctrls); - } return status; } +static const char msg_unregfail[] __exitdata = KERN_ERR \ + "mkiss: can't unregister line discipline (err = %d)\n"; + static void __exit mkiss_exit_driver(void) { - int i; + int ret; - for (i = 0; i < ax25_maxdev; i++) { - if (ax25_ctrls[i]) { - /* - * VSV = if dev->start==0, then device - * unregistered while close proc. - */ - if (netif_running(&ax25_ctrls[i]->dev)) - unregister_netdev(&ax25_ctrls[i]->dev); - kfree(ax25_ctrls[i]); - } - } - - kfree(ax25_ctrls); - ax25_ctrls = NULL; - - if ((i = tty_unregister_ldisc(N_AX25))) - printk(KERN_ERR "mkiss: can't unregister line discipline (err = %d)\n", i); + if ((ret = tty_unregister_ldisc(N_AX25))) + printk(msg_unregfail, ret); } -MODULE_AUTHOR("Hans Albas PE1AYX "); +MODULE_AUTHOR("Ralf Baechle DL5RB "); MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs"); -MODULE_PARM(ax25_maxdev, "i"); -MODULE_PARM_DESC(ax25_maxdev, "number of MKISS devices"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_AX25); + module_init(mkiss_init_driver); module_exit(mkiss_exit_driver); - From 86f0cd505781e42000763821ec6f70127a6abaae Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Wed, 24 Aug 2005 01:14:23 +0200 Subject: [PATCH 202/584] [PATCH] r8169: avoid conflict between revisions 2 and 3 of the Linksys EG1032 Both revisions share the same PCI device ID and vendor ID but revision 2 of the device uses SysKonnect's chipset whereas revision 3 of the device uses Realtek's 8169 chipset. Credit goes to Christiaan Lutzer for reporting the issue and giving the actual value for the different revisions. Signed-off-by: Francois Romieu Signed-off-by: Jeff Garzik --- drivers/net/r8169.c | 2 +- drivers/net/skge.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 2f9b3227243b..f0471d102e3c 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -186,8 +186,8 @@ const static struct { static struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), }, - { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, 0x1032), }, { PCI_DEVICE(0x16ec, 0x0116), }, + { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024, }, {0,}, }; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 48a43b84ea5f..d7c98515fdfd 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -79,8 +79,8 @@ static const struct pci_device_id skge_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4320) }, { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x5005) }, /* Belkin */ { PCI_DEVICE(PCI_VENDOR_ID_CNET, PCI_DEVICE_ID_CNET_GIGACARD) }, - { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, PCI_DEVICE_ID_LINKSYS_EG1032) }, { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, PCI_DEVICE_ID_LINKSYS_EG1064) }, + { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0015, }, { 0 } }; MODULE_DEVICE_TABLE(pci, skge_id_table); From 6a029a90f5b93e2b50bcbbaef05ef91fa0c1d6b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Aug 2005 06:48:15 +0100 Subject: [PATCH 203/584] [PATCH] mmaper_kern.c fixes [buffer overruns] - copy_from_user() can fail; ->write() must check its return value. - severe buffer overruns both in ->read() and ->write() - lseek to the end (i.e. to mmapper_size) and if (count + *ppos > mmapper_size) count = count + *ppos - mmapper_size; will do absolutely nothing. Then it will call copy_to_user(buf,&v_buf[*ppos],count); with obvious results (similar for ->write()). Fixed by turning read to simple_read_from_buffer() and by doing normal limiting of count in ->write(). - gratitious lock_kernel() in ->mmap() - it's useless there. - lots of gratuitous includes. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/um/drivers/mmapper_kern.c | 41 ++++++++-------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c index a37a5ac13c22..022f67bb6873 100644 --- a/arch/um/drivers/mmapper_kern.c +++ b/arch/um/drivers/mmapper_kern.c @@ -9,19 +9,11 @@ * */ -#include -#include -#include -#include +#include #include #include -#include -#include -#include #include #include -#include -#include #include "mem_user.h" #include "user_util.h" @@ -31,35 +23,22 @@ static unsigned long p_buf = 0; static char *v_buf = NULL; static ssize_t -mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos) +mmapper_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - if(*ppos > mmapper_size) - return -EINVAL; - - if(count + *ppos > mmapper_size) - count = count + *ppos - mmapper_size; - - if(count < 0) - return -EINVAL; - - copy_to_user(buf,&v_buf[*ppos],count); - - return count; + return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); } static ssize_t -mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +mmapper_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - if(*ppos > mmapper_size) + if (*ppos > mmapper_size) return -EINVAL; - if(count + *ppos > mmapper_size) - count = count + *ppos - mmapper_size; + if (count > mmapper_size - *ppos) + count = mmapper_size - *ppos; - if(count < 0) - return -EINVAL; - - copy_from_user(&v_buf[*ppos],buf,count); + if (copy_from_user(&v_buf[*ppos], buf, count)) + return -EFAULT; return count; } @@ -77,7 +56,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma) int ret = -EINVAL; int size; - lock_kernel(); if (vma->vm_pgoff != 0) goto out; @@ -92,7 +70,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma) goto out; ret = 0; out: - unlock_kernel(); return ret; } From 8dbddf17824861f2298de093549e6493d9844835 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 27 Aug 2005 00:56:18 -0600 Subject: [PATCH 204/584] [PATCH] acpi_shutdown: Only prepare for power off on power_off When acpi_sleep_prepare was moved into a shutdown method we started calling it for all shutdowns. It appears this triggers some systems to power off on reboot. Avoid this by only calling acpi_sleep_prepare if we are going to power off the system. Signed-off-by: Eric W. Biederman Signed-off-by: Linus Torvalds --- drivers/acpi/sleep/poweroff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c index 186b182c5825..f93d2ee54800 100644 --- a/drivers/acpi/sleep/poweroff.c +++ b/drivers/acpi/sleep/poweroff.c @@ -55,7 +55,11 @@ void acpi_power_off(void) static int acpi_shutdown(struct sys_device *x) { - return acpi_sleep_prepare(ACPI_STATE_S5); + if (system_state == SYSTEM_POWER_OFF) { + /* Prepare if we are going to power off the system */ + return acpi_sleep_prepare(ACPI_STATE_S5); + } + return 0; } static struct sysdev_class acpi_sysclass = { From 820d220de400cfaaf846a2d8b5de93f9ea5a9b80 Mon Sep 17 00:00:00 2001 From: James Morris Date: Sat, 27 Aug 2005 13:47:06 +0200 Subject: [PATCH 205/584] [PATCH] Fix capifs bug in initialization error path. This fixes a bug in the capifs initialization code, where the filesystem is not unregistered if kern_mount() fails. Signed-off-by: James Morris Signed-off-by: Karsten Keil Signed-off-by: Linus Torvalds --- drivers/isdn/capi/capifs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index f8570fd9d2ab..3abd7fc6e5ef 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -191,8 +191,10 @@ static int __init capifs_init(void) err = register_filesystem(&capifs_fs_type); if (!err) { capifs_mnt = kern_mount(&capifs_fs_type); - if (IS_ERR(capifs_mnt)) + if (IS_ERR(capifs_mnt)) { err = PTR_ERR(capifs_mnt); + unregister_filesystem(&capifs_fs_type); + } } if (!err) printk(KERN_NOTICE "capifs: Rev %s\n", rev); From 8126fdbc76351bdf99c6737ef4fecf88a22fa538 Mon Sep 17 00:00:00 2001 From: Patrick Boettcher Date: Sat, 27 Aug 2005 19:30:30 +0200 Subject: [PATCH 206/584] [PATCH] fix for race problem in DVB USB drivers (dibusb) Fixed race between submitting streaming URBs in the driver and starting the actual transfer in hardware (demodulator and USB controller) which sometimes lead to garbled data transfers. URBs are now submitted first, then the transfer is enabled. Dibusb devices and clones are now fully functional again. Signed-off-by: Patrick Boettcher Signed-off-by: Linus Torvalds --- drivers/media/dvb/dvb-usb/dibusb-common.c | 19 ++++++++++++++----- drivers/media/dvb/dvb-usb/dvb-usb-dvb.c | 5 +++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/media/dvb/dvb-usb/dibusb-common.c b/drivers/media/dvb/dvb-usb/dibusb-common.c index 63b626f70c81..9b9d6f8ee74e 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-common.c +++ b/drivers/media/dvb/dvb-usb/dibusb-common.c @@ -70,13 +70,22 @@ EXPORT_SYMBOL(dibusb_power_ctrl); int dibusb2_0_streaming_ctrl(struct dvb_usb_device *d, int onoff) { - u8 b[2]; + u8 b[3] = { 0 }; + int ret; + + if ((ret = dibusb_streaming_ctrl(d,onoff)) < 0) + return ret; + + if (onoff) { + b[0] = DIBUSB_REQ_SET_STREAMING_MODE; + b[1] = 0x00; + if ((ret = dvb_usb_generic_write(d,b,2)) < 0) + return ret; + } + b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM; - - dvb_usb_generic_write(d,b,3); - - return dibusb_streaming_ctrl(d,onoff); + return dvb_usb_generic_write(d,b,3); } EXPORT_SYMBOL(dibusb2_0_streaming_ctrl); diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c b/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c index 3491ff40885c..6fa92100248b 100644 --- a/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c +++ b/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c @@ -23,12 +23,12 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff) */ if (newfeedcount == 0) { deb_ts("stop feeding\n"); + dvb_usb_urb_kill(d); if (d->props.streaming_ctrl != NULL) if ((ret = d->props.streaming_ctrl(d,0))) err("error while stopping stream."); - dvb_usb_urb_kill(d); } d->feedcount = newfeedcount; @@ -44,6 +44,8 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff) * for reception. */ if (d->feedcount == onoff && d->feedcount > 0) { + deb_ts("submitting all URBs\n"); + dvb_usb_urb_submit(d); deb_ts("controlling pid parser\n"); if (d->props.caps & DVB_USB_HAS_PID_FILTER && @@ -59,7 +61,6 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff) return -ENODEV; } - dvb_usb_urb_submit(d); } return 0; } From 729d70f5dfd663b44bca68a4479c96bde7e535d6 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Sat, 27 Aug 2005 11:07:52 -0700 Subject: [PATCH 207/584] [PATCH] sg.c: fix a memory leak in devices seq_file implementation I know that scsi procfs is legacy code but this is a fix for a memory leak. While reading through sg.c I realized that the implementation of /proc/scsi/sg/devices with seq_file is leaking memory due to freeing the pointer returned by the next() iterator method. Since next() might return NULL or an error this is wrong. This patch fixes it through using the seq_files private field for holding the reference to the iterator object. Here is a small bash script to trigger the leak. Use slabtop to watch the size-32 usage grow and grow. #!/bin/sh while true; do cat /proc/scsi/sg/devices > /dev/null done Signed-off-by: Jan Blunck Acked-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/sg.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 51292f269ce5..e822ca0e97cf 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2971,23 +2971,22 @@ static void * dev_seq_start(struct seq_file *s, loff_t *pos) { struct sg_proc_deviter * it = kmalloc(sizeof(*it), GFP_KERNEL); + s->private = it; if (! it) return NULL; + if (NULL == sg_dev_arr) - goto err1; + return NULL; it->index = *pos; it->max = sg_last_dev(); if (it->index >= it->max) - goto err1; + return NULL; return it; -err1: - kfree(it); - return NULL; } static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; + struct sg_proc_deviter * it = s->private; *pos = ++it->index; return (it->index < it->max) ? it : NULL; @@ -2995,7 +2994,7 @@ static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dev_seq_stop(struct seq_file *s, void *v) { - kfree (v); + kfree(s->private); } static int sg_proc_open_dev(struct inode *inode, struct file *file) From 3859f6a248cbdfbe7b41663f3a2b51f48e30b281 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 27 Aug 2005 11:07:54 -0700 Subject: [PATCH 208/584] [PATCH] zfcp: add rports to enable scsi_add_device to work again This patch fixes a severe problem with 2.6.13-rc7. Due to recent SCSI changes it is not possible to add any LUNs to the zfcp device driver anymore. With registration of remote ports this is fixed. Signed-off-by: Andreas Herrmann Acked-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/scsi/zfcp_aux.c | 29 +++++++---------------------- drivers/s390/scsi/zfcp_ccw.c | 10 ++++++++++ drivers/s390/scsi/zfcp_def.h | 2 +- drivers/s390/scsi/zfcp_erp.c | 25 ++++++++++++++++++++++--- drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_fsf.c | 1 + drivers/s390/scsi/zfcp_scsi.c | 25 ++++++++++++++++++++----- 7 files changed, 63 insertions(+), 31 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index e17b4d58a9f6..6fed4a532ca3 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1299,13 +1299,10 @@ struct zfcp_port * zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status, u32 d_id) { - struct zfcp_port *port, *tmp_port; + struct zfcp_port *port; int check_wwpn; - scsi_id_t scsi_id; - int found; check_wwpn = !(status & ZFCP_STATUS_PORT_NO_WWPN); - /* * check that there is no port with this WWPN already in list */ @@ -1368,7 +1365,7 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status, } else { snprintf(port->sysfs_device.bus_id, BUS_ID_SIZE, "0x%016llx", wwpn); - port->sysfs_device.parent = &adapter->ccw_device->dev; + port->sysfs_device.parent = &adapter->ccw_device->dev; } port->sysfs_device.release = zfcp_sysfs_port_release; dev_set_drvdata(&port->sysfs_device, port); @@ -1388,24 +1385,8 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status, zfcp_port_get(port); - scsi_id = 1; - found = 0; write_lock_irq(&zfcp_data.config_lock); - list_for_each_entry(tmp_port, &adapter->port_list_head, list) { - if (atomic_test_mask(ZFCP_STATUS_PORT_NO_SCSI_ID, - &tmp_port->status)) - continue; - if (tmp_port->scsi_id != scsi_id) { - found = 1; - break; - } - scsi_id++; - } - port->scsi_id = scsi_id; - if (found) - list_add_tail(&port->list, &tmp_port->list); - else - list_add_tail(&port->list, &adapter->port_list_head); + list_add_tail(&port->list, &adapter->port_list_head); atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status); atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &port->status); if (d_id == ZFCP_DID_DIRECTORY_SERVICE) @@ -1422,11 +1403,15 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status, void zfcp_port_dequeue(struct zfcp_port *port) { + struct fc_port *rport; + zfcp_port_wait(port); write_lock_irq(&zfcp_data.config_lock); list_del(&port->list); port->adapter->ports--; write_unlock_irq(&zfcp_data.config_lock); + if (port->rport) + fc_remote_port_delete(rport); zfcp_adapter_put(port->adapter); zfcp_sysfs_port_remove_files(&port->sysfs_device, atomic_read(&port->status)); diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 0fc46381fc22..3c65aedaa97f 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -202,9 +202,19 @@ static int zfcp_ccw_set_offline(struct ccw_device *ccw_device) { struct zfcp_adapter *adapter; + struct zfcp_port *port; + struct fc_port *rport; down(&zfcp_data.config_sema); adapter = dev_get_drvdata(&ccw_device->dev); + /* might be racy, but we cannot take config_lock due to the fact that + fc_remote_port_delete might sleep */ + list_for_each_entry(port, &adapter->port_list_head, list) + if (port->rport) { + rport = port->rport; + port->rport = NULL; + fc_remote_port_delete(rport); + } zfcp_erp_adapter_shutdown(adapter, 0); zfcp_erp_wait(adapter); zfcp_adapter_scsi_unregister(adapter); diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 4103b5be7683..455e902533a9 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -906,6 +906,7 @@ struct zfcp_adapter { */ struct zfcp_port { struct device sysfs_device; /* sysfs device */ + struct fc_rport *rport; /* rport of fc transport class */ struct list_head list; /* list of remote ports */ atomic_t refcount; /* reference count */ wait_queue_head_t remove_wq; /* can be used to wait for @@ -916,7 +917,6 @@ struct zfcp_port { list */ u32 units; /* # of logical units in list */ atomic_t status; /* status of this remote port */ - scsi_id_t scsi_id; /* own SCSI ID */ wwn_t wwnn; /* WWNN if known */ wwn_t wwpn; /* WWPN */ fc_id_t d_id; /* D_ID */ diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 0cf31f7d1c0f..cb4f612550ba 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -3360,13 +3360,32 @@ zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter, if ((result == ZFCP_ERP_SUCCEEDED) && (!atomic_test_mask(ZFCP_STATUS_UNIT_TEMPORARY, &unit->status)) - && (!unit->device)) - scsi_add_device(unit->port->adapter->scsi_host, 0, - unit->port->scsi_id, unit->scsi_lun); + && !unit->device + && port->rport) + scsi_add_device(port->adapter->scsi_host, 0, + port->rport->scsi_target_id, + unit->scsi_lun); zfcp_unit_put(unit); break; case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: case ZFCP_ERP_ACTION_REOPEN_PORT: + if ((result == ZFCP_ERP_SUCCEEDED) + && !atomic_test_mask(ZFCP_STATUS_PORT_NO_WWPN, + &port->status) + && !port->rport) { + struct fc_rport_identifiers ids; + ids.node_name = port->wwnn; + ids.port_name = port->wwpn; + ids.port_id = port->d_id; + ids.roles = FC_RPORT_ROLE_FCP_TARGET; + port->rport = + fc_remote_port_add(adapter->scsi_host, 0, &ids); + if (!port->rport) + ZFCP_LOG_NORMAL("failed registration of rport" + "(adapter %s, wwpn=0x%016Lx)\n", + zfcp_get_busid_by_port(port), + port->wwpn); + } zfcp_port_put(port); break; case ZFCP_ERP_ACTION_REOPEN_ADAPTER: diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 42df7e57eeae..cd98a2de9f8f 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -143,6 +143,8 @@ extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *, struct scsi_cmnd *, struct timer_list *); extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, struct timer_list *); +extern void zfcp_set_fc_host_attrs(struct zfcp_adapter *); +extern void zfcp_set_fc_rport_attrs(struct zfcp_port *); extern struct scsi_transport_template *zfcp_transport_template; extern struct fc_function_template zfcp_transport_functions; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 0d9f20edc490..c007b6424e74 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2062,6 +2062,7 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok) zfcp_erp_adapter_shutdown(adapter, 0); return -EIO; } + zfcp_set_fc_host_attrs(adapter); return 0; } diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index b61d309352c3..31a76065cf28 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -389,7 +389,7 @@ zfcp_unit_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id, struct zfcp_unit *unit, *retval = NULL; list_for_each_entry(port, &adapter->port_list_head, list) { - if (id != port->scsi_id) + if (!port->rport || (id != port->rport->scsi_target_id)) continue; list_for_each_entry(unit, &port->unit_list_head, list) { if (lun == unit->scsi_lun) { @@ -408,7 +408,7 @@ zfcp_port_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id) struct zfcp_port *port; list_for_each_entry(port, &adapter->port_list_head, list) { - if (id == port->scsi_id) + if (port->rport && (id == port->rport->scsi_target_id)) return port; } return (struct zfcp_port *) NULL; @@ -634,7 +634,6 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt) { int retval; struct zfcp_unit *unit = (struct zfcp_unit *) scpnt->device->hostdata; - struct Scsi_Host *scsi_host = scpnt->device->host; if (!unit) { ZFCP_LOG_NORMAL("bug: Tried reset for nonexistent unit\n"); @@ -729,7 +728,6 @@ zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt) { int retval = 0; struct zfcp_unit *unit; - struct Scsi_Host *scsi_host = scpnt->device->host; unit = (struct zfcp_unit *) scpnt->device->hostdata; ZFCP_LOG_NORMAL("bus reset because of problems with " @@ -753,7 +751,6 @@ zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) { int retval = 0; struct zfcp_unit *unit; - struct Scsi_Host *scsi_host = scpnt->device->host; unit = (struct zfcp_unit *) scpnt->device->hostdata; ZFCP_LOG_NORMAL("host reset because of problems with " @@ -833,6 +830,7 @@ zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter) shost = adapter->scsi_host; if (!shost) return; + fc_remove_host(shost); scsi_remove_host(shost); scsi_host_put(shost); adapter->scsi_host = NULL; @@ -906,6 +904,18 @@ zfcp_get_node_name(struct scsi_target *starget) read_unlock_irqrestore(&zfcp_data.config_lock, flags); } +void +zfcp_set_fc_host_attrs(struct zfcp_adapter *adapter) +{ + struct Scsi_Host *shost = adapter->scsi_host; + + fc_host_node_name(shost) = adapter->wwnn; + fc_host_port_name(shost) = adapter->wwpn; + strncpy(fc_host_serial_number(shost), adapter->serial_number, + min(FC_SERIAL_NUMBER_SIZE, 32)); + fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; +} + struct fc_function_template zfcp_transport_functions = { .get_starget_port_id = zfcp_get_port_id, .get_starget_port_name = zfcp_get_port_name, @@ -913,6 +923,11 @@ struct fc_function_template zfcp_transport_functions = { .show_starget_port_id = 1, .show_starget_port_name = 1, .show_starget_node_name = 1, + .show_rport_supported_classes = 1, + .show_host_node_name = 1, + .show_host_port_name = 1, + .show_host_supported_classes = 1, + .show_host_serial_number = 1, }; /** From f786648b89f00d4e66fe6b19beffd30e764651fc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 28 Aug 2005 09:40:01 +1000 Subject: [PATCH 209/584] [PATCH] Remove race between con_open and con_close [ Same race and same patch also by Steven Rostedt ] I have a laptop (G3 powerbook) which will pretty reliably hit a race between con_open and con_close late in the boot process and oops in vt_ioctl due to tty->driver_data being NULL. What happens is this: process A opens /dev/tty6; it comes into con_open() (drivers/char/vt.c) and assign a non-NULL value to tty->driver_data. Then process A closes that and concurrently process B opens /dev/tty6. Process A gets through con_close() and clears tty->driver_data, since tty->count == 1. However, before process A can decrement tty->count, we switch to process B (e.g. at the down(&tty_sem) call at drivers/char/tty_io.c line 1626). So process B gets to run and comes into con_open with tty->count == 2, as tty->count is incremented (in init_dev) before con_open is called. Because tty->count != 1, we don't set tty->driver_data. Then when the process tries to do anything with that fd, it oopses. The simple and effective fix for this is to test tty->driver_data rather than tty->count in con_open. The testing and setting of tty->driver_data is serialized with respect to the clearing of tty->driver_data in con_close by the console_sem. We can't get a situation where con_open sees tty->driver_data != NULL and then con_close on a different fd clears tty->driver_data, because tty->count is incremented before con_open is called. Thus this patch eliminates the race, and in fact with this patch my laptop doesn't oops. Signed-off-by: Paul Mackerras [ Same patch Signed-off-by: Steven Rostedt in http://marc.theaimsgroup.com/?l=linux-kernel&m=112450820432121&w=2 ] Signed-off-by: Linus Torvalds --- drivers/char/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 30d96739fb23..665103ccaee8 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -2433,7 +2433,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) int ret = 0; acquire_console_sem(); - if (tty->count == 1) { + if (tty->driver_data == NULL) { ret = vc_allocate(currcons); if (ret == 0) { struct vc_data *vc = vc_cons[currcons].d; From 7f84f226389fc5f47b3cb36818972e2e171607de Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 28 Aug 2005 15:33:53 +0400 Subject: [PATCH 210/584] [PATCH] zfcp: fix compilation due to rports changes struct zfcp_port::scsi_id was removed by commit 3859f6a248cbdfbe7b41663f3a2b51f48e30b281 Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- drivers/s390/scsi/zfcp_sysfs_port.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_sysfs_port.c b/drivers/s390/scsi/zfcp_sysfs_port.c index 7a84c7d474d9..c55e82d91deb 100644 --- a/drivers/s390/scsi/zfcp_sysfs_port.c +++ b/drivers/s390/scsi/zfcp_sysfs_port.c @@ -67,7 +67,6 @@ static DEVICE_ATTR(_name, S_IRUGO, zfcp_sysfs_port_##_name##_show, NULL); ZFCP_DEFINE_PORT_ATTR(status, "0x%08x\n", atomic_read(&port->status)); ZFCP_DEFINE_PORT_ATTR(wwnn, "0x%016llx\n", port->wwnn); ZFCP_DEFINE_PORT_ATTR(d_id, "0x%06x\n", port->d_id); -ZFCP_DEFINE_PORT_ATTR(scsi_id, "0x%x\n", port->scsi_id); ZFCP_DEFINE_PORT_ATTR(in_recovery, "%d\n", atomic_test_mask (ZFCP_STATUS_COMMON_ERP_INUSE, &port->status)); ZFCP_DEFINE_PORT_ATTR(access_denied, "%d\n", atomic_test_mask @@ -263,7 +262,6 @@ static struct attribute_group zfcp_port_common_attr_group = { static struct attribute *zfcp_port_no_ns_attrs[] = { &dev_attr_unit_add.attr, &dev_attr_unit_remove.attr, - &dev_attr_scsi_id.attr, NULL }; From 20b1730af3ae05450b0e03f5aed40c4313f65db6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 28 Aug 2005 13:22:37 -0700 Subject: [PATCH 211/584] [PATCH] zfcp: bugfix and compile fixes Bugfix (usage of uninitialized pointer in zfcp_port_dequeue) and compile fixes for the zfcp device driver. Signed-off-by: Heiko Carstens Acked-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/scsi/zfcp_aux.c | 5 ++--- drivers/s390/scsi/zfcp_ccw.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 6fed4a532ca3..bfe3ba73bc0f 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1403,15 +1403,14 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status, void zfcp_port_dequeue(struct zfcp_port *port) { - struct fc_port *rport; - zfcp_port_wait(port); write_lock_irq(&zfcp_data.config_lock); list_del(&port->list); port->adapter->ports--; write_unlock_irq(&zfcp_data.config_lock); if (port->rport) - fc_remote_port_delete(rport); + fc_remote_port_delete(port->rport); + port->rport = NULL; zfcp_adapter_put(port->adapter); zfcp_sysfs_port_remove_files(&port->sysfs_device, atomic_read(&port->status)); diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 3c65aedaa97f..b30abab77da3 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -203,7 +203,7 @@ zfcp_ccw_set_offline(struct ccw_device *ccw_device) { struct zfcp_adapter *adapter; struct zfcp_port *port; - struct fc_port *rport; + struct fc_rport *rport; down(&zfcp_data.config_sema); adapter = dev_get_drvdata(&ccw_device->dev); From 4cd426f24ff2d95dd9b3aa15fa5fca9736678ad5 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sun, 28 Aug 2005 22:39:08 +0100 Subject: [PATCH 212/584] [ARM] drop i386-isms from arm Kconfig This kills i386-specific stuff from arm Kconfig. Please apply, Signed-off-by: Pavel Machek Signed-off-by: Russell King --- arch/arm/Kconfig | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7bc4a583f4e1..f91c8d982ef7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -635,10 +635,6 @@ config PM and the Battery Powered Linux mini-HOWTO, available from . - Note that, even if you say N here, Linux on the x86 architecture - will issue the hlt instruction if nothing is to be done, thereby - sending the processor to sleep and saving power. - config APM tristate "Advanced Power Management Emulation" depends on PM @@ -650,12 +646,6 @@ config APM battery status information, and user-space programs will receive notification of APM "events" (e.g. battery status change). - If you select "Y" here, you can disable actual use of the APM - BIOS by passing the "apm=off" option to the kernel at boot time. - - Note that the APM support is almost completely disabled for - machines with more than one CPU. - In order to use APM, you will need supporting software. For location and more information, read and the Battery Powered Linux mini-HOWTO, available from @@ -665,39 +655,12 @@ config APM manpage ("man 8 hdparm") for that), and it doesn't turn off VESA-compliant "green" monitors. - This driver does not support the TI 4000M TravelMate and the ACER - 486/DX4/75 because they don't have compliant BIOSes. Many "green" - desktop machines also don't have compliant BIOSes, and this driver - may cause those machines to panic during the boot phase. - Generally, if you don't have a battery in your machine, there isn't much point in using this driver and you should say N. If you get random kernel OOPSes or reboots that don't seem to be related to anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). - Some other things you should try when experiencing seemingly random, - "weird" problems: - - 1) make sure that you have enough swap space and that it is - enabled. - 2) pass the "no-hlt" option to the kernel - 3) switch on floating point emulation in the kernel and pass - the "no387" option to the kernel - 4) pass the "floppy=nodma" option to the kernel - 5) pass the "mem=4M" option to the kernel (thereby disabling - all but the first 4 MB of RAM) - 6) make sure that the CPU is not over clocked. - 7) read the sig11 FAQ at - 8) disable the cache from your BIOS settings - 9) install a fan for the video card or exchange video RAM - 10) install a better fan for the CPU - 11) exchange RAM chips - 12) exchange the motherboard. - - To compile this driver as a module, choose M here: the - module will be called apm. - endmenu source "net/Kconfig" From 02b3e4e2d71b6058ec11cc01c72ac651eb3ded2b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Aug 2005 16:41:01 -0700 Subject: [PATCH 213/584] Linux v2.6.13 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 300f61f6f6a2..5acd1fc68d17 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 13 -EXTRAVERSION =-rc7 +EXTRAVERSION = NAME=Woozy Numbat # *DOCUMENTATION* From af36d7f0df56de3e3e4bbfb15d0915097ecb8cab Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 28 Aug 2005 20:18:39 -0400 Subject: [PATCH 214/584] [libata] license change, other bits - changes license of all code from OSL+GPL to plain ole GPL - except for NVIDIA, who hasn't yet responded about sata_nv - copyright holders were already contacted privately - adds info in each driver about where hardware/protocol docs may be obtained - where I have made major contributions, updated copyright dates --- drivers/scsi/ahci.c | 40 ++++++++++++++---------- drivers/scsi/ata_piix.c | 58 ++++++++++++++++++++++------------ drivers/scsi/libata-core.c | 52 ++++++++++++++++++------------- drivers/scsi/libata-scsi.c | 53 ++++++++++++++++++------------- drivers/scsi/libata.h | 45 ++++++++++++++------------- drivers/scsi/sata_nv.c | 11 +++++++ drivers/scsi/sata_promise.c | 33 +++++++++++--------- drivers/scsi/sata_promise.h | 31 ++++++++++--------- drivers/scsi/sata_qstor.c | 31 ++++++++++--------- drivers/scsi/sata_sil.c | 33 +++++++++++--------- drivers/scsi/sata_sis.c | 33 +++++++++++--------- drivers/scsi/sata_svw.c | 33 +++++++++++--------- drivers/scsi/sata_sx4.c | 33 +++++++++++--------- drivers/scsi/sata_uli.c | 33 +++++++++++--------- drivers/scsi/sata_via.c | 62 ++++++++++++++++++++----------------- drivers/scsi/sata_vsc.c | 26 ++++++++++++++-- include/linux/ata.h | 43 +++++++++++++------------ include/linux/libata.h | 41 ++++++++++++------------ 18 files changed, 409 insertions(+), 282 deletions(-) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index e3b9692b9688..7eaaf7a2744d 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -1,26 +1,34 @@ /* * ahci.c - AHCI SATA support * - * Copyright 2004 Red Hat, Inc. + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. + * Copyright 2004-2005 Red Hat, Inc. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. * - * Version 1.0 of the AHCI specification: + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * AHCI hardware documentation: * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf + * http://www.intel.com/technology/serialata/pdf/rev1_1.pdf * */ diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index d96ebf9d2228..6898b7f74389 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -1,24 +1,42 @@ /* - - ata_piix.c - Intel PATA/SATA controllers - - Maintained by: Jeff Garzik - Please ALWAYS copy linux-ide@vger.kernel.org - on emails. - - - Copyright 2003-2004 Red Hat Inc - Copyright 2003-2004 Jeff Garzik - - - Copyright header from piix.c: - - Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer - Copyright (C) 1998-2000 Andre Hedrick - Copyright (C) 2003 Red Hat Inc - - May be copied or modified under the terms of the GNU General Public License - + * ata_piix.c - Intel PATA/SATA controllers + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * + * Copyright 2003-2005 Red Hat Inc + * Copyright 2003-2005 Jeff Garzik + * + * + * Copyright header from piix.c: + * + * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer + * Copyright (C) 1998-2000 Andre Hedrick + * Copyright (C) 2003 Red Hat Inc + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available at http://developer.intel.com/ + * */ #include diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index f4e7dcb6492b..4154e5b6bad8 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1,25 +1,35 @@ /* - libata-core.c - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata-core.c - helper library for ATA + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from http://www.t13.org/ and + * http://www.sata-io.org/ + * */ #include diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 6a75ec2187fd..c6aeab1630ee 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1,25 +1,36 @@ /* - libata-scsi.c - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata-scsi.c - helper library for ATA + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from + * - http://www.t10.org/ + * - http://www.t13.org/ + * */ #include diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 3e7f4843020f..c51d658903d7 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -1,25 +1,28 @@ /* - libata.h - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata.h - helper library for ATA + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * */ #ifndef __LIBATA_H__ diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index b0403ccd8a25..1e10370adc34 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -20,6 +20,17 @@ * If you do not delete the provisions above, a recipient may use your * version of this file under either the OSL or the GPL. * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * No hardware documentation available outside of NVIDIA. + * This driver programs the NVIDIA SATA controller in a similar + * fashion as with other PCI IDE BMDMA controllers, with a few + * NV-specific details such as register offsets, SATA phy location, + * hotplug info, etc. + * + * * 0.06 * - Added generic SATA support by using a pci_device_id that filters on * the IDE storage class code. diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 919fb314ad10..b27e2e20280b 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -7,21 +7,26 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware information only available under NDA. * */ diff --git a/drivers/scsi/sata_promise.h b/drivers/scsi/sata_promise.h index 6e7e96b9ee13..6ee5e190262d 100644 --- a/drivers/scsi/sata_promise.h +++ b/drivers/scsi/sata_promise.h @@ -3,21 +3,24 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 1383e8a28d72..f6b716f4fab8 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -6,21 +6,24 @@ * Copyright 2005 Pacific Digital Corporation. * (OSL/GPL code release authorized by Jalil Fadavi). * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 49ed557a4b66..345e6f2d28a8 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -5,24 +5,27 @@ * Please ALWAYS copy linux-ide@vger.kernel.org * on emails. * - * Copyright 2003 Red Hat, Inc. + * Copyright 2003-2005 Red Hat, Inc. * Copyright 2003 Benjamin Herrenschmidt * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index e418b89c6b9d..6db8b09db401 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -7,21 +7,26 @@ * * Copyright 2004 Uwe Koziolek * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 858e07185dbd..3884a3cce75c 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -13,21 +13,26 @@ * This driver probably works with non-Apple versions of the * Broadcom chipset... * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index efd7d7a61135..c7f6ec262a15 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -7,21 +7,26 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index a71fb54eebd3..fa1021980b23 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -1,21 +1,26 @@ /* * sata_uli.c - ULi Electronics SATA * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index f43183c19a12..6653ffe956f5 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -1,34 +1,38 @@ /* - sata_via.c - VIA Serial ATA controllers - - Maintained by: Jeff Garzik - Please ALWAYS copy linux-ide@vger.kernel.org + * sata_via.c - VIA Serial ATA controllers + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org on emails. - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - - ---------------------------------------------------------------------- - - To-do list: - * VT6421 PATA support - + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. + * + * + * To-do list: + * - VT6421 PATA support + * */ #include diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index c5e09dc6f3de..8bddb8228d58 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -9,9 +9,29 @@ * * Bits from Jeff Garzik, Copyright RedHat, Inc. * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Vitesse hardware documentation presumably available under NDA. + * Intel 31244 (same hardware interface) documentation presumably + * available from http://developer.intel.com/ + * */ #include diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf9981..19c3e2853f17 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -1,24 +1,29 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from http://www.t13.org/ + * */ #ifndef __LINUX_ATA_H__ diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563b..51d2b20d34f2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1,23 +1,26 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2005 Red Hat, Inc. All rights reserved. + * Copyright 2003-2005 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * */ #ifndef __LINUX_LIBATA_H__ From e13934563db047043ccead26412f552375cea90c Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 24 Aug 2005 18:46:21 -0500 Subject: [PATCH 215/584] [PATCH] PHY Layer fixup This patch adds back the code that was taken out, thus re-enabling: * The PHY Layer to initialize without crashing * Drivers to actually connect to PHYs * The entire PHY Control Layer This patch is used by the gianfar driver, and other drivers which are in development. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- drivers/net/phy/Kconfig | 8 + drivers/net/phy/Makefile | 11 +- drivers/net/phy/mdio_bus.c | 79 ++++++++- drivers/net/phy/phy.c | 325 +++++++++++++++++++++++++++-------- drivers/net/phy/phy_device.c | 172 +++++++++++++++--- include/linux/phy.h | 17 ++ 6 files changed, 510 insertions(+), 102 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 6450bd71deb4..6a2fe3583478 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -12,6 +12,14 @@ config PHYLIB devices. This option provides infrastructure for managing PHY devices. +config PHYCONTROL + bool " Support for automatically handling PHY state changes" + depends on PHYLIB + help + Adds code to perform all the work for keeping PHY link + state (speed/duplex/etc) up-to-date. Also handles + interrupts. + comment "MII PHY device drivers" depends on PHYLIB diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index fb7cb385a659..e4116a5fbb4c 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,8 +2,9 @@ libphy-objs := phy.o phy_device.o mdio_bus.o -obj-$(CONFIG_MARVELL_PHY) += libphy.o marvell.o -obj-$(CONFIG_DAVICOM_PHY) += libphy.o davicom.o -obj-$(CONFIG_CICADA_PHY) += libphy.o cicada.o -obj-$(CONFIG_LXT_PHY) += libphy.o lxt.o -obj-$(CONFIG_QSEMI_PHY) += libphy.o qsemi.o +obj-$(CONFIG_PHYLIB) += libphy.o +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index d5a05be28818..41f62c0c5fcb 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,6 +38,80 @@ #include #include +/* mdiobus_register + * + * description: Called by a bus driver to bring up all the PHYs + * on a given bus, and attach them to the bus + */ +int mdiobus_register(struct mii_bus *bus) +{ + int i; + int err = 0; + + spin_lock_init(&bus->mdio_lock); + + if (NULL == bus || NULL == bus->name || + NULL == bus->read || + NULL == bus->write) + return -EINVAL; + + if (bus->reset) + bus->reset(bus); + + for (i = 0; i < PHY_MAX_ADDR; i++) { + struct phy_device *phydev; + + phydev = get_phy_device(bus, i); + + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + + /* There's a PHY at this address + * We need to set: + * 1) IRQ + * 2) bus_id + * 3) parent + * 4) bus + * 5) mii_bus + * And, we need to register it */ + if (phydev) { + phydev->irq = bus->irq[i]; + + phydev->dev.parent = bus->dev; + phydev->dev.bus = &mdio_bus_type; + sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); + + phydev->bus = bus; + + err = device_register(&phydev->dev); + + if (err) + printk(KERN_ERR "phy %d failed to register\n", + i); + } + + bus->phy_map[i] = phydev; + } + + pr_info("%s: probed\n", bus->name); + + return err; +} +EXPORT_SYMBOL(mdiobus_register); + +void mdiobus_unregister(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if (bus->phy_map[i]) { + device_unregister(&bus->phy_map[i]->dev); + kfree(bus->phy_map[i]); + } + } +} +EXPORT_SYMBOL(mdiobus_unregister); + /* mdio_bus_match * * description: Given a PHY device, and a PHY driver, return 1 if @@ -96,4 +170,7 @@ int __init mdio_bus_init(void) return bus_register(&mdio_bus_type); } - +void __exit mdio_bus_exit(void) +{ + bus_unregister(&mdio_bus_type); +} diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d3e43631b89b..d9e11f93bf3a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -39,10 +39,20 @@ #include #include -static void phy_timer(unsigned long data); -static int phy_disable_interrupts(struct phy_device *phydev); -static void phy_sanitize_settings(struct phy_device *phydev); -static int phy_stop_interrupts(struct phy_device *phydev); +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); /* Convenience functions for reading/writing a given PHY @@ -114,42 +124,6 @@ static inline int phy_aneg_done(struct phy_device *phydev) return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); } -/* phy_start_aneg - * - * description: Calls the PHY driver's config_aneg, and then - * sets the PHY state to PHY_AN if auto-negotiation is enabled, - * and to PHY_FORCING if auto-negotiation is disabled. Unless - * the PHY is currently HALTED. - */ -static int phy_start_aneg(struct phy_device *phydev) -{ - int err; - - spin_lock(&phydev->lock); - - if (AUTONEG_DISABLE == phydev->autoneg) - phy_sanitize_settings(phydev); - - err = phydev->drv->config_aneg(phydev); - - if (err < 0) - goto out_unlock; - - if (phydev->state != PHY_HALTED) { - if (AUTONEG_ENABLE == phydev->autoneg) { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; - } - } - -out_unlock: - spin_unlock(&phydev->lock); - return err; -} - /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value */ struct phy_setting { @@ -241,7 +215,7 @@ static inline int phy_find_valid(int idx, u32 features) * duplexes. Drop down by one in this order: 1000/FULL, * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF */ -static void phy_sanitize_settings(struct phy_device *phydev) +void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; int idx; @@ -256,31 +230,7 @@ static void phy_sanitize_settings(struct phy_device *phydev) phydev->speed = settings[idx].speed; phydev->duplex = settings[idx].duplex; } - -/* phy_force_reduction - * - * description: Reduces the speed/duplex settings by - * one notch. The order is so: - * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, - * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. - */ -static void phy_force_reduction(struct phy_device *phydev) -{ - int idx; - - idx = phy_find_setting(phydev->speed, phydev->duplex); - - idx++; - - idx = phy_find_valid(idx, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - pr_info("Trying %d/%s\n", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "FULL" : "HALF"); -} +EXPORT_SYMBOL(phy_sanitize_settings); /* phy_ethtool_sset: * A generic ethtool sset function. Handles all the details @@ -291,6 +241,11 @@ static void phy_force_reduction(struct phy_device *phydev) * - phy_start_aneg() will make sure forced settings are sane, and * choose the next best ones from the ones selected, so we don't * care if ethtool tries to give us bad values + * + * A note about the PHYCONTROL Layer. If you turn off + * CONFIG_PHYCONTROL, you will need to read the PHY status + * registers after this function completes, and update your + * controller manually. */ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) { @@ -406,6 +361,51 @@ int phy_mii_ioctl(struct phy_device *phydev, return 0; } +/* phy_start_aneg + * + * description: Sanitizes the settings (if we're not + * autonegotiating them), and then calls the driver's + * config_aneg function. If the PHYCONTROL Layer is operating, + * we change the state to reflect the beginning of + * Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + +#ifdef CONFIG_PHYCONTROL + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: +#endif + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +#ifdef CONFIG_PHYCONTROL +static void phy_change(void *data); +static void phy_timer(unsigned long data); + /* phy_start_machine: * * description: The PHY infrastructure can run a state machine @@ -448,6 +448,32 @@ void phy_stop_machine(struct phy_device *phydev) phydev->adjust_state = NULL; } +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + + /* phy_error: * * Moves the PHY to the HALTED state in response to a read @@ -462,22 +488,44 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } -static int phy_stop_interrupts(struct phy_device *phydev) +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) { int err; - err = phy_disable_interrupts(phydev); + err = phy_clear_interrupt(phydev); - if (err) - phy_error(phydev); + if (err < 0) + return err; - free_irq(phydev->irq, phydev); + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); return err; } +EXPORT_SYMBOL(phy_enable_interrupts); /* Disable the PHY interrupts from the PHY side */ -static int phy_disable_interrupts(struct phy_device *phydev) +int phy_disable_interrupts(struct phy_device *phydev) { int err; @@ -500,6 +548,138 @@ phy_err: return err; } +EXPORT_SYMBOL(phy_disable_interrupts); + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) @@ -688,3 +868,4 @@ static void phy_timer(unsigned long data) mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); } +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c44d54f6310a..33f7bdb5857c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -39,18 +39,9 @@ #include #include -static int genphy_config_init(struct phy_device *phydev); - -static struct phy_driver genphy_driver = { - .phy_id = 0xffffffff, - .phy_id_mask = 0xffffffff, - .name = "Generic PHY", - .config_init = genphy_config_init, - .features = 0, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .driver = {.owner = THIS_MODULE, }, -}; +static struct phy_driver genphy_driver; +extern int mdio_bus_init(void); +extern void mdio_bus_exit(void); /* get_phy_device * @@ -110,6 +101,7 @@ struct phy_device * get_phy_device(struct mii_bus *bus, int addr) return dev; } +#ifdef CONFIG_PHYCONTROL /* phy_prepare_link: * * description: Tells the PHY infrastructure to handle the @@ -124,6 +116,132 @@ void phy_prepare_link(struct phy_device *phydev, phydev->adjust_link = handler; } +/* phy_connect: + * + * description: Convenience function for connecting ethernet + * devices to PHY devices. The default behavior is for + * the PHY infrastructure to handle everything, and only notify + * the connected driver when the link status changes. If you + * don't want, or can't use the provided functionality, you may + * choose to call only the subset of functions which provide + * the desired functionality. + */ +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags) +{ + struct phy_device *phydev; + + phydev = phy_attach(dev, phy_id, flags); + + if (IS_ERR(phydev)) + return phydev; + + phy_prepare_link(phydev, handler); + + phy_start_machine(phydev, NULL); + + if (phydev->irq > 0) + phy_start_interrupts(phydev); + + return phydev; +} +EXPORT_SYMBOL(phy_connect); + +void phy_disconnect(struct phy_device *phydev) +{ + if (phydev->irq > 0) + phy_stop_interrupts(phydev); + + phy_stop_machine(phydev); + + phydev->adjust_link = NULL; + + phy_detach(phydev); +} +EXPORT_SYMBOL(phy_disconnect); + +#endif /* CONFIG_PHYCONTROL */ + +/* phy_attach: + * + * description: Called by drivers to attach to a particular PHY + * device. The phy_device is found, and properly hooked up + * to the phy_driver. If no driver is attached, then the + * genphy_driver is used. The phy_device is given a ptr to + * the attaching device, and given a callback for link status + * change. The phy_device is returned to the attaching + * driver. + */ +static int phy_compare_id(struct device *dev, void *data) +{ + return strcmp((char *)data, dev->bus_id) ? 0 : 1; +} + +struct phy_device *phy_attach(struct net_device *dev, + const char *phy_id, u32 flags) +{ + struct bus_type *bus = &mdio_bus_type; + struct phy_device *phydev; + struct device *d; + + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); + + if (d) { + phydev = to_phy_device(d); + } else { + printk(KERN_ERR "%s not found\n", phy_id); + return ERR_PTR(-ENODEV); + } + + /* Assume that if there is no driver, that it doesn't + * exist, and we should use the genphy driver. */ + if (NULL == d->driver) { + int err; + down_write(&d->bus->subsys.rwsem); + d->driver = &genphy_driver.driver; + + err = d->driver->probe(d); + + if (err < 0) + return ERR_PTR(err); + + device_bind_driver(d); + up_write(&d->bus->subsys.rwsem); + } + + if (phydev->attached_dev) { + printk(KERN_ERR "%s: %s already attached\n", + dev->name, phy_id); + return ERR_PTR(-EBUSY); + } + + phydev->attached_dev = dev; + + phydev->dev_flags = flags; + + return phydev; +} +EXPORT_SYMBOL(phy_attach); + +void phy_detach(struct phy_device *phydev) +{ + phydev->attached_dev = NULL; + + /* If the device had no specific driver before (i.e. - it + * was using the generic driver), we unbind the device + * from the generic driver so that there's a chance a + * real driver could be loaded */ + if (phydev->dev.driver == &genphy_driver.driver) { + down_write(&phydev->dev.bus->subsys.rwsem); + device_release_driver(&phydev->dev); + up_write(&phydev->dev.bus->subsys.rwsem); + } +} +EXPORT_SYMBOL(phy_detach); + + /* Generic PHY support and helper functions */ /* genphy_config_advert @@ -132,7 +250,7 @@ void phy_prepare_link(struct phy_device *phydev, * after sanitizing the values to make sure we only advertise * what is supported */ -static int genphy_config_advert(struct phy_device *phydev) +int genphy_config_advert(struct phy_device *phydev) { u32 advertise; int adv; @@ -190,6 +308,7 @@ static int genphy_config_advert(struct phy_device *phydev) return adv; } +EXPORT_SYMBOL(genphy_config_advert); /* genphy_setup_forced * @@ -541,32 +660,37 @@ void phy_driver_unregister(struct phy_driver *drv) } EXPORT_SYMBOL(phy_driver_unregister); +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner= THIS_MODULE, }, +}; static int __init phy_init(void) { int rc; - extern int mdio_bus_init(void); - - rc = phy_driver_register(&genphy_driver); - if (rc) - goto out; rc = mdio_bus_init(); if (rc) - goto out_unreg; + return rc; - return 0; + rc = phy_driver_register(&genphy_driver); + if (rc) + mdio_bus_exit(); -out_unreg: - phy_driver_unregister(&genphy_driver); -out: return rc; } static void __exit phy_exit(void) { phy_driver_unregister(&genphy_driver); + mdio_bus_exit(); } -module_init(phy_init); +subsys_initcall(phy_init); module_exit(phy_exit); diff --git a/include/linux/phy.h b/include/linux/phy.h index 4f2b5effc16b..72cb67b66e0c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,11 +334,26 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } +int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -355,6 +370,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ From decd300b30e499fe6be1bbfc5650fc971de8c1fa Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 8 Aug 2005 13:24:38 +1000 Subject: [PATCH 216/584] [PATCH] ppc64: make arch/ppc64/boot standalone Make the bootheader for ppc64 independent from kernel and libc headers. * add -nostdinc -isystem $gccincludes to not include libc headers * declare all functions in header files, also the stuff from string.S * declare some functions static * use stddef.h to get size_t (hopefully ok) * remove ppc32-types.h, only elf.h used the __NN types With further modifications by Paul Mackerras and Stephen Rothwell. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/ppc64/boot/Makefile | 4 +- arch/ppc64/boot/addnote.c | 4 +- arch/ppc64/boot/crt0.S | 2 +- arch/ppc64/boot/div64.S | 2 +- arch/ppc64/boot/elf.h | 149 ++++++++++++++++++++++++++ arch/ppc64/boot/main.c | 51 ++++----- arch/ppc64/boot/page.h | 34 ++++++ arch/ppc64/boot/ppc32-types.h | 36 ------- arch/ppc64/boot/ppc_asm.h | 62 +++++++++++ arch/ppc64/boot/prom.c | 196 +++++----------------------------- arch/ppc64/boot/prom.h | 18 ++++ arch/ppc64/boot/stdio.h | 16 +++ arch/ppc64/boot/string.S | 2 +- arch/ppc64/boot/string.h | 16 +++ arch/ppc64/boot/zlib.c | 2 +- 15 files changed, 348 insertions(+), 246 deletions(-) create mode 100644 arch/ppc64/boot/elf.h create mode 100644 arch/ppc64/boot/page.h delete mode 100644 arch/ppc64/boot/ppc32-types.h create mode 100644 arch/ppc64/boot/ppc_asm.h create mode 100644 arch/ppc64/boot/prom.h create mode 100644 arch/ppc64/boot/stdio.h create mode 100644 arch/ppc64/boot/string.h diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile index 683b2d43c15f..2c5f5e73d00c 100644 --- a/arch/ppc64/boot/Makefile +++ b/arch/ppc64/boot/Makefile @@ -22,8 +22,8 @@ HOSTCC := gcc -BOOTCFLAGS := $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin -BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional +BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) +BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data diff --git a/arch/ppc64/boot/addnote.c b/arch/ppc64/boot/addnote.c index 719663a694bb..8041a9845ab7 100644 --- a/arch/ppc64/boot/addnote.c +++ b/arch/ppc64/boot/addnote.c @@ -157,7 +157,7 @@ main(int ac, char **av) PUT_32BE(ns, strlen(arch) + 1); PUT_32BE(ns + 4, N_DESCR * 4); PUT_32BE(ns + 8, 0x1275); - strcpy(&buf[ns + 12], arch); + strcpy((char *) &buf[ns + 12], arch); ns += 12 + strlen(arch) + 1; for (i = 0; i < N_DESCR; ++i, ns += 4) PUT_32BE(ns, descr[i]); @@ -172,7 +172,7 @@ main(int ac, char **av) PUT_32BE(ns, strlen(rpaname) + 1); PUT_32BE(ns + 4, sizeof(rpanote)); PUT_32BE(ns + 8, 0x12759999); - strcpy(&buf[ns + 12], rpaname); + strcpy((char *) &buf[ns + 12], rpaname); ns += 12 + ROUNDUP(strlen(rpaname) + 1); for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) PUT_32BE(ns, rpanote[i]); diff --git a/arch/ppc64/boot/crt0.S b/arch/ppc64/boot/crt0.S index 04d3e74cd72f..3861e7f9cf19 100644 --- a/arch/ppc64/boot/crt0.S +++ b/arch/ppc64/boot/crt0.S @@ -9,7 +9,7 @@ * NOTE: this code runs in 32 bit mode and is packaged as ELF32. */ -#include +#include "ppc_asm.h" .text .globl _start diff --git a/arch/ppc64/boot/div64.S b/arch/ppc64/boot/div64.S index 38f7e466d7d6..722f360a32a9 100644 --- a/arch/ppc64/boot/div64.S +++ b/arch/ppc64/boot/div64.S @@ -13,7 +13,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include +#include "ppc_asm.h" .globl __div64_32 __div64_32: diff --git a/arch/ppc64/boot/elf.h b/arch/ppc64/boot/elf.h new file mode 100644 index 000000000000..d4828fcf1cb9 --- /dev/null +++ b/arch/ppc64/boot/elf.h @@ -0,0 +1,149 @@ +#ifndef _PPC_BOOT_ELF_H_ +#define _PPC_BOOT_ELF_H_ + +/* 32-bit ELF base types. */ +typedef unsigned int Elf32_Addr; +typedef unsigned short Elf32_Half; +typedef unsigned int Elf32_Off; +typedef signed int Elf32_Sword; +typedef unsigned int Elf32_Word; + +/* 64-bit ELF base types. */ +typedef unsigned long long Elf64_Addr; +typedef unsigned short Elf64_Half; +typedef signed short Elf64_SHalf; +typedef unsigned long long Elf64_Off; +typedef signed int Elf64_Sword; +typedef unsigned int Elf64_Word; +typedef unsigned long long Elf64_Xword; +typedef signed long long Elf64_Sxword; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME 0x6474e550 + +#define PT_GNU_STACK (PT_LOOS + 0x474e551) + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* These constants define the various ELF target machines */ +#define EM_NONE 0 +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC64 */ + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[16]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#endif /* _PPC_BOOT_ELF_H_ */ diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index 199d9804f61c..99e68cfbe688 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -8,36 +8,28 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include "ppc32-types.h" +#include +#include +#include "elf.h" +#include "page.h" +#include "string.h" +#include "stdio.h" +#include "prom.h" #include "zlib.h" -#include -#include -#include -#include -extern void *finddevice(const char *); -extern int getprop(void *, const char *, void *, int); -extern void printf(const char *fmt, ...); -extern int sprintf(char *buf, const char *fmt, ...); -void gunzip(void *, int, unsigned char *, int *); -void *claim(unsigned int, unsigned int, unsigned int); -void flush_cache(void *, unsigned long); -void pause(void); -extern void exit(void); +static void gunzip(void *, int, unsigned char *, int *); +extern void flush_cache(void *, unsigned long); -unsigned long strlen(const char *s); -void *memmove(void *dest, const void *src, unsigned long n); -void *memcpy(void *dest, const void *src, unsigned long n); /* Value picked to match that used by yaboot */ #define PROG_START 0x01400000 #define RAM_END (256<<20) // Fixme: use OF */ -char *avail_ram; -char *begin_avail, *end_avail; -char *avail_high; -unsigned int heap_use; -unsigned int heap_max; +static char *avail_ram; +static char *begin_avail, *end_avail; +static char *avail_high; +static unsigned int heap_use; +static unsigned int heap_max; extern char _start[]; extern char _vmlinux_start[]; @@ -52,9 +44,9 @@ struct addr_range { unsigned long size; unsigned long memsize; }; -struct addr_range vmlinux = {0, 0, 0}; -struct addr_range vmlinuz = {0, 0, 0}; -struct addr_range initrd = {0, 0, 0}; +static struct addr_range vmlinux = {0, 0, 0}; +static struct addr_range vmlinuz = {0, 0, 0}; +static struct addr_range initrd = {0, 0, 0}; static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ @@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long, void *); -int (*prom)(void *); - -void *chosen_handle; -void *stdin; -void *stdout; -void *stderr; - #undef DEBUG static unsigned long claim_base = PROG_START; @@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb) #define DEFLATED 8 -void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) { z_stream s; int r, i, flags; diff --git a/arch/ppc64/boot/page.h b/arch/ppc64/boot/page.h new file mode 100644 index 000000000000..14eca30fef64 --- /dev/null +++ b/arch/ppc64/boot/page.h @@ -0,0 +1,34 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/arch/ppc64/boot/ppc32-types.h b/arch/ppc64/boot/ppc32-types.h deleted file mode 100644 index f7b8884f8f70..000000000000 --- a/arch/ppc64/boot/ppc32-types.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _PPC64_TYPES_H -#define _PPC64_TYPES_H - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long long __s64; -typedef unsigned long long __u64; - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -typedef struct { - __u32 u[4]; -} __attribute((aligned(16))) __vector128; - -#define BITS_PER_LONG 32 - -typedef __vector128 vector128; - -#endif /* _PPC64_TYPES_H */ diff --git a/arch/ppc64/boot/ppc_asm.h b/arch/ppc64/boot/ppc_asm.h new file mode 100644 index 000000000000..1c2c2817f9b7 --- /dev/null +++ b/arch/ppc64/boot/ppc_asm.h @@ -0,0 +1,62 @@ +#ifndef _PPC64_PPC_ASM_H +#define _PPC64_PPC_ASM_H +/* + * + * Definitions used by various bits of low-level assembly code on PowerPC. + * + * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c index 5e48b80ff5a0..4bea2f4dcb06 100644 --- a/arch/ppc64/boot/prom.c +++ b/arch/ppc64/boot/prom.c @@ -7,43 +7,19 @@ * 2 of the License, or (at your option) any later version. */ #include -#include -#include -#include - -extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor); - -/* The unnecessary pointer compare is there - * to check for type safety (n must be 64bit) - */ -# define do_div(n,base) ({ \ - __u32 __base = (base); \ - __u32 __rem; \ - (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ - if (((n) >> 32) == 0) { \ - __rem = (__u32)(n) % __base; \ - (n) = (__u32)(n) / __base; \ - } else \ - __rem = __div64_32(&(n), __base); \ - __rem; \ - }) +#include +#include "string.h" +#include "stdio.h" +#include "prom.h" int (*prom)(void *); void *chosen_handle; + void *stdin; void *stdout; void *stderr; -void exit(void); -void *finddevice(const char *name); -int getprop(void *phandle, const char *name, void *buf, int buflen); -void chrpboot(int a1, int a2, void *prom); /* in main.c */ - -int printf(char *fmt, ...); - -/* there is no convenient header to get this from... -- paulus */ -extern unsigned long strlen(const char *); int write(void *handle, void *ptr, int nb) @@ -210,107 +186,6 @@ fputs(char *str, void *f) return write(f, str, n) == n? 0: -1; } -int -readchar(void) -{ - char ch; - - for (;;) { - switch (read(stdin, &ch, 1)) { - case 1: - return ch; - case -1: - printf("read(stdin) returned -1\r\n"); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int -getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - putchar('\a'); - else { - putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - - - -/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ -unsigned char _ctype[] = { -_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ -_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ -_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ -_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ -_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ -_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ - size_t strnlen(const char * s, size_t count) { const char *sc; @@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count) return sc - s; } -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0,value; +extern unsigned int __div64_32(unsigned long long *dividend, + unsigned int divisor); - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -long simple_strtol(const char *cp,char **endp,unsigned int base) -{ - if(*cp=='-') - return -simple_strtoul(cp+1,endp,base); - return simple_strtoul(cp,endp,base); -} +/* The unnecessary pointer compare is there + * to check for type safety (n must be 64bit) + */ +# define do_div(n,base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ + if (((n) >> 32) == 0) { \ + __rem = (unsigned int)(n) % __base; \ + (n) = (unsigned int)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) static int skip_atoi(const char **s) { - int i=0; + int i, c; - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; return i; } @@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int return str; } -/* Forward decl. needed for IP address printing stuff... */ -int sprintf(char * buf, const char *fmt, ...); - int vsprintf(char *buf, const char *fmt, va_list args) { int len; @@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if (isdigit(*fmt)) + if ('0' <= *fmt && *fmt <= '9') field_width = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) precision = -1; if (*fmt == '.') { ++fmt; - if (isdigit(*fmt)) + if ('0' <= *fmt && *fmt <= '9') precision = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...) static char sprint_buf[1024]; int -printf(char *fmt, ...) +printf(const char *fmt, ...) { va_list args; int n; diff --git a/arch/ppc64/boot/prom.h b/arch/ppc64/boot/prom.h new file mode 100644 index 000000000000..96ab5aec740c --- /dev/null +++ b/arch/ppc64/boot/prom.h @@ -0,0 +1,18 @@ +#ifndef _PPC_BOOT_PROM_H_ +#define _PPC_BOOT_PROM_H_ + +extern int (*prom) (void *); +extern void *chosen_handle; + +extern void *stdin; +extern void *stdout; +extern void *stderr; + +extern int write(void *handle, void *ptr, int nb); +extern int read(void *handle, void *ptr, int nb); +extern void exit(void); +extern void pause(void); +extern void *finddevice(const char *); +extern void *claim(unsigned long virt, unsigned long size, unsigned long align); +extern int getprop(void *phandle, const char *name, void *buf, int buflen); +#endif /* _PPC_BOOT_PROM_H_ */ diff --git a/arch/ppc64/boot/stdio.h b/arch/ppc64/boot/stdio.h new file mode 100644 index 000000000000..24bd3a8dee94 --- /dev/null +++ b/arch/ppc64/boot/stdio.h @@ -0,0 +1,16 @@ +#ifndef _PPC_BOOT_STDIO_H_ +#define _PPC_BOOT_STDIO_H_ + +extern int printf(const char *fmt, ...); + +extern int sprintf(char *buf, const char *fmt, ...); + +extern int vsprintf(char *buf, const char *fmt, va_list args); + +extern int putc(int c, void *f); +extern int putchar(int c); +extern int getchar(void); + +extern int fputs(char *str, void *f); + +#endif /* _PPC_BOOT_STDIO_H_ */ diff --git a/arch/ppc64/boot/string.S b/arch/ppc64/boot/string.S index ba5f2d21c9ea..7ade87ae7718 100644 --- a/arch/ppc64/boot/string.S +++ b/arch/ppc64/boot/string.S @@ -9,7 +9,7 @@ * NOTE: this code runs in 32 bit mode and is packaged as ELF32. */ -#include +#include "ppc_asm.h" .text .globl strcpy diff --git a/arch/ppc64/boot/string.h b/arch/ppc64/boot/string.h new file mode 100644 index 000000000000..9289258bcbd6 --- /dev/null +++ b/arch/ppc64/boot/string.h @@ -0,0 +1,16 @@ +#ifndef _PPC_BOOT_STRING_H_ +#define _PPC_BOOT_STRING_H_ + +extern char *strcpy(char *dest, const char *src); +extern char *strncpy(char *dest, const char *src, size_t n); +extern char *strcat(char *dest, const char *src); +extern int strcmp(const char *s1, const char *s2); +extern size_t strlen(const char *s); +extern size_t strnlen(const char *s, size_t count); + +extern void *memset(void *s, int c, size_t n); +extern void *memmove(void *dest, const void *src, unsigned long n); +extern void *memcpy(void *dest, const void *src, unsigned long n); +extern int memcmp(const void *s1, const void *s2, size_t n); + +#endif /* _PPC_BOOT_STRING_H_ */ diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c index 78837e884b8b..0d910cd2079d 100644 --- a/arch/ppc64/boot/zlib.c +++ b/arch/ppc64/boot/zlib.c @@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long); /* Diagnostic functions */ #ifdef DEBUG_ZLIB -# include +# include "stdio.h" # ifndef verbose # define verbose 0 # endif From e28f7faf05159f1cfd564596f5e6178edba6bd49 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 5 Aug 2005 19:39:06 +1000 Subject: [PATCH 217/584] [PATCH] Four level pagetables for ppc64 Implement 4-level pagetables for ppc64 This patch implements full four-level page tables for ppc64, thereby extending the usable user address range to 44 bits (16T). The patch uses a full page for the tables at the bottom and top level, and a quarter page for the intermediate levels. It uses full 64-bit pointers at every level, thus also increasing the addressable range of physical memory. This patch also tweaks the VSID allocation to allow matching range for user addresses (this halves the number of available contexts) and adds some #if and BUILD_BUG sanity checks. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/mm/hash_utils.c | 2 +- arch/ppc64/mm/hugetlbpage.c | 207 ++++++++++++---------------------- arch/ppc64/mm/imalloc.c | 2 +- arch/ppc64/mm/init.c | 62 ++++++---- arch/ppc64/mm/slb_low.S | 2 +- arch/ppc64/mm/tlb.c | 95 +++++++++------- include/asm-ppc64/imalloc.h | 2 +- include/asm-ppc64/mmu.h | 7 +- include/asm-ppc64/page.h | 26 +++-- include/asm-ppc64/pgalloc.h | 93 +++++++++------ include/asm-ppc64/pgtable.h | 92 ++++++++------- include/asm-ppc64/processor.h | 4 +- 12 files changed, 305 insertions(+), 289 deletions(-) diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 623b5d130c31..65d6e8527948 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int local = 0; cpumask_t tmp; - if ((ea & ~REGION_MASK) > EADDR_MASK) + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) return 1; switch (REGION_ID(ea)) { diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index f9524602818d..a13e44230a6f 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -27,124 +27,91 @@ #include -#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) -#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) -#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) - -#define HUGEPTE_INDEX_SIZE 9 -#define HUGEPGD_INDEX_SIZE 10 - -#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) -#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) - -static inline int hugepgd_index(unsigned long addr) +/* Modelled after find_linux_pte() */ +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { - return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; -} + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; -static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) -{ - int index; - - if (! mm->context.huge_pgdir) - return NULL; - - - index = hugepgd_index(addr); - BUG_ON(index >= PTRS_PER_HUGEPGD); - return (pud_t *)(mm->context.huge_pgdir + index); -} - -static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) -{ - int index; - - if (pud_none(*dir)) - return NULL; - - index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; - return (pte_t *)pud_page(*dir) + index; -} - -static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) -{ BUG_ON(! in_hugepage_area(mm->context, addr)); - if (! mm->context.huge_pgdir) { - pgd_t *new; - spin_unlock(&mm->page_table_lock); - /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); - spin_lock(&mm->page_table_lock); + addr &= HPAGE_MASK; - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (mm->context.huge_pgdir) - pgd_free(new); - else - mm->context.huge_pgdir = new; - } - return hugepgd_offset(mm, addr); -} - -static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) -{ - if (! pud_present(*dir)) { - pte_t *new; - - spin_unlock(&mm->page_table_lock); - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); - spin_lock(&mm->page_table_lock); - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (pud_present(*dir)) { - if (new) - kmem_cache_free(zero_cache, new); - } else { - struct page *ptepage; - - if (! new) - return NULL; - ptepage = virt_to_page(new); - ptepage->mapping = (void *) mm; - ptepage->index = addr & HUGEPGDIR_MASK; - pud_populate(mm, dir, new); + pg = pgd_offset(mm, addr); + if (!pgd_none(*pg)) { + pu = pud_offset(pg, addr); + if (!pud_none(*pu)) { + pm = pmd_offset(pu, addr); + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; } } - return hugepte_offset(dir, addr); -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pud_t *pud; - - BUG_ON(! in_hugepage_area(mm->context, addr)); - - pud = hugepgd_offset(mm, addr); - if (! pud) - return NULL; - - return hugepte_offset(pud, addr); + return NULL; } pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { - pud_t *pud; + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); - pud = hugepgd_alloc(mm, addr); - if (! pud) - return NULL; + addr &= HPAGE_MASK; - return hugepte_alloc(mm, pud, addr); + pg = pgd_offset(mm, addr); + pu = pud_alloc(mm, pg, addr); + + if (pu) { + pm = pmd_alloc(mm, pu, addr); + if (pm) { + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; + } + } + + return NULL; +} + +#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + int i; + + if (pte_present(*ptep)) { + pte_clear(mm, addr, ptep); + flush_tlb_pending(); + } + + for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + ptep++; + } +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long old = pte_update(ptep, ~0UL); + int i; + + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, old, 0); + + for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) + ptep[i] = __pte(0); + + return __pte(old); } /* @@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } } -void hugetlb_mm_free_pgd(struct mm_struct *mm) -{ - int i; - pgd_t *pgdir; - - spin_lock(&mm->page_table_lock); - - pgdir = mm->context.huge_pgdir; - if (! pgdir) - goto out; - - mm->context.huge_pgdir = NULL; - - /* cleanup any hugepte pages leftover */ - for (i = 0; i < PTRS_PER_HUGEPGD; i++) { - pud_t *pud = (pud_t *)(pgdir + i); - - if (! pud_none(*pud)) { - pte_t *pte = (pte_t *)pud_page(*pud); - struct page *ptepage = virt_to_page(pte); - - ptepage->mapping = NULL; - - BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); - kmem_cache_free(zero_cache, pte); - } - pud_clear(pud); - } - - BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); - kmem_cache_free(zero_cache, pgdir); - - out: - spin_unlock(&mm->page_table_lock); -} - int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local) { diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index b6e75b891ac0..c65b87b92756 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) break; if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; - if (addr > IMALLOC_END-size) + if (addr >= IMALLOC_END-size) return 1; } *im_addr = addr; diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index e58a24d42879..87f256df8de5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -66,6 +66,14 @@ #include #include +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, * Before that, we map using addresses going * up from ioremap_bot. imalloc will use * the addresses from ioremap_bot through - * IMALLOC_END (0xE000001fffffffff) + * IMALLOC_END * */ pa = addr & PAGE_MASK; @@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int index; int err; -#ifdef CONFIG_HUGETLB_PAGE - /* We leave htlb_segs as it was, but for a fork, we need to - * clear the huge_pgdir. */ - mm->context.huge_pgdir = NULL; -#endif - again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) return -ENOMEM; @@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm) spin_unlock(&mmu_context_lock); mm->context.id = NO_CONTEXT; - - hugetlb_mm_free_pgd(mm); } /* @@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size) return virt_addr; } -kmem_cache_t *zero_cache; - -static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) { - memset(pte, 0, PAGE_SIZE); + memset(addr, 0, kmem_cache_size(cache)); } +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pgd_pte_cache", "pud_pmd_cache", +}; + +kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; + void pgtable_cache_init(void) { - zero_cache = kmem_cache_create("zero", - PAGE_SIZE, - 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - zero_ctor, - NULL); - if (!zero_cache) - panic("pgtable_cache_init(): could not create zero_cache!\n"); + int i; + + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); + + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { + int size = pgtable_cache_size[i]; + const char *name = pgtable_cache_name[i]; + + pgtable_cache[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN + | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (! pgtable_cache[i]) + panic("pgtable_cache_init(): could not create %s!\n", + name); + } } pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index 8379d678f70f..f20fc52483a7 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S @@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 0: /* user address: proto-VSID = context<<15 | ESID */ li r11,SLB_VSID_USER - srdi. r9,r3,13 + srdi. r9,r3,USER_ESID_BITS bne- 8f /* invalid ea bits set */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c index 26f0172c4527..d8a6593a13f0 100644 --- a/arch/ppc64/mm/tlb.c +++ b/arch/ppc64/mm/tlb.c @@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; -void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) +struct pte_freelist_batch +{ + struct rcu_head rcu; + unsigned int index; + pgtable_free_t tables[0]; +}; + +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +#define PTE_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ + / sizeof(pgtable_free_t)) + +#ifdef CONFIG_SMP +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} +#endif + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +static void pgtable_free_now(pgtable_free_t pgf) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pgtable_free(pgf); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pgtable_free(batch->tables[i]); + + free_page((unsigned long)batch); +} + +static void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { /* This is safe as we are holding page_table_lock */ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); @@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) if (atomic_read(&tlb->mm->mm_users) < 2 || cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pte_free(ptepage); + pgtable_free(pgf); return; } if (*batchp == NULL) { *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); if (*batchp == NULL) { - pte_free_now(ptepage); + pgtable_free_now(pgf); return; } (*batchp)->index = 0; } - (*batchp)->pages[(*batchp)->index++] = ptepage; + (*batchp)->tables[(*batchp)->index++] = pgf; if ((*batchp)->index == PTE_FREELIST_SIZE) { pte_free_submit(*batchp); *batchp = NULL; @@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) put_cpu(); } -#ifdef CONFIG_SMP -static void pte_free_smp_sync(void *arg) -{ - /* Do nothing, just ensure we sync with all CPUs */ -} -#endif - -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -void pte_free_now(struct page *ptepage) -{ - pte_freelist_forced_free++; - - smp_call_function(pte_free_smp_sync, NULL, 0, 1); - - pte_free(ptepage); -} - -static void pte_free_rcu_callback(struct rcu_head *head) -{ - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; - - for (i = 0; i < batch->index; i++) - pte_free(batch->pages[i]); - free_page((unsigned long)batch); -} - -void pte_free_submit(struct pte_freelist_batch *batch) -{ - INIT_RCU_HEAD(&batch->rcu); - call_rcu(&batch->rcu, pte_free_rcu_callback); -} - void pte_free_finish(void) { /* This is safe as we are holding page_table_lock */ diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h index e46ff68a6e41..42adf7033a81 100644 --- a/include/asm-ppc64/imalloc.h +++ b/include/asm-ppc64/imalloc.h @@ -6,7 +6,7 @@ */ #define PHBS_IO_BASE VMALLOC_END #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (VMALLOC_START + EADDR_MASK) +#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) /* imalloc region types */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 70348a851313..959a4bfdcd6a 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -259,8 +259,10 @@ extern void stabs_alloc(void); #define VSID_BITS 36 #define VSID_MODULUS ((1UL<context.htlb_segs) @@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. */ typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pte(x) ((pte_t) { (x) }) +#define __pmd(x) ((pmd_t) { (x) }) +#define __pud(x) ((pud_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) #else /* * .. while these make it easier on the compiler */ typedef unsigned long pte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) -#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) - #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) #ifdef CONFIG_DISCONTIGMEM diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 4fc4b739b380..26bc49c1108d 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h @@ -6,7 +6,12 @@ #include #include -extern kmem_cache_t *zero_cache; +extern kmem_cache_t *pgtable_cache[]; + +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 1 +#define PUD_CACHE_NUM 1 +#define PGD_CACHE_NUM 0 /* * This program is free software; you can redistribute it and/or @@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache; * 2 of the License, or (at your option) any later version. */ -static inline pgd_t * -pgd_alloc(struct mm_struct *mm) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL); + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); } -static inline void -pgd_free(pgd_t *pgd) +static inline void pgd_free(pgd_t *pgd) { - kmem_cache_free(zero_cache, pgd); + kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); +} + +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(pud_t *pud) +{ + kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static inline pmd_t * -pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } -static inline void -pmd_free(pmd_t *pmd) +static inline void pmd_free(pmd_t *pmd) { - kmem_cache_free(zero_cache, pmd); + kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) @@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); - if (pte) - return virt_to_page(pte); - return NULL; + return virt_to_page(pte_alloc_one_kernel(mm, address)); } static inline void pte_free_kernel(pte_t *pte) { - kmem_cache_free(zero_cache, pte); + kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); } static inline void pte_free(struct page *ptepage) { - kmem_cache_free(zero_cache, page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -struct pte_freelist_batch +#define PGF_CACHENUM_MASK 0xf + +typedef struct pgtable_free { + unsigned long val; +} pgtable_free_t; + +static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, + unsigned long mask) { - struct rcu_head rcu; - unsigned int index; - struct page * pages[0]; -}; + BUG_ON(cachenum > PGF_CACHENUM_MASK); -#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ - sizeof(struct page *)) + return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; +} -extern void pte_free_now(struct page *ptepage); -extern void pte_free_submit(struct pte_freelist_batch *batch); +static inline void pgtable_free(pgtable_free_t pgf) +{ + void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); + int cachenum = pgf.val & PGF_CACHENUM_MASK; -DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); + kmem_cache_free(pgtable_cache[cachenum], p); +} -void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); -#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); + +#define __pte_free_tlb(tlb, ptepage) \ + pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ + PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) +#define __pmd_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ + PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) +#define __pud_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ + PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 46cf61c2ff69..5ea952ad7164 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -15,19 +15,24 @@ #include #endif /* __ASSEMBLY__ */ -#include - /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ #define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 10 -#define PGD_INDEX_SIZE 10 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 7 +#define PGD_INDEX_SIZE 9 + +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* PMD_SHIFT determines what a second-level page table entry can map */ @@ -35,8 +40,13 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -45,15 +55,23 @@ /* * Size of EA range mapped by our pagetables. */ -#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PGD_INDEX_SIZE + PAGE_SHIFT) -#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) +#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) + +#if TASK_SIZE_USER64 > PGTABLE_RANGE +#error TASK_SIZE_USER64 exceeds pagetable range +#endif + +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#error TASK_SIZE_USER64 exceeds user VSID range +#endif /* * Define the address range of the vmalloc VM area. */ #define VMALLOC_START (0xD000000000000000ul) -#define VMALLOC_SIZE (0x10000000000UL) +#define VMALLOC_SIZE (0x80000000000UL) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #ifndef __ASSEMBLY__ int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local); - -void hugetlb_mm_free_pgd(struct mm_struct *mm); #endif /* __ASSEMBLY__ */ #define HAVE_ARCH_UNMAPPED_AREA @@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm); #else #define hash_huge_page(mm,a,ea,vsid,local) -1 -#define hugetlb_mm_free_pgd(mm) do {} while (0) #endif @@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) \ - (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) +#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) +#define pmd_page_kernel(pmd) (pmd_val(pmd)) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) -#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) +#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud)) == 0UL) -#define pud_present(pud) (pud_val(pud) != 0UL) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) +#define pud_bad(pud) ((pud_val(pud)) == 0) +#define pud_present(pud) (pud_val(pud) != 0) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0) +#define pud_page(pud) (pud_val(pud)) + +#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) +#define pgd_page(pgd) (pgd_val(pgd)) /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. */ /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pudp,addr) \ - ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + +#define pmd_offset(pudp,addr) \ + (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) -/* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ - ((pte_t *) pmd_page_kernel(*(dir)) \ - + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) extern pgd_t swapper_pg_dir[]; extern void paging_init(void); -/* - * Because the huge pgtables are only 2 level, they can take - * at most around 4M, much less than one hugepage which the - * process is presumably entitled to use. So we don't bother - * freeing up the pagetables on unmap, and wait until - * destroy_context() to clean up the lot. - */ #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ - do { } while (0) + free_pgd_range(tlb, addr, end, floor, ceiling) /* * This gets called at the end of handling a page fault, when diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 352306cfb579..50b14c0ddb87 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_altivec; -/* 64-bit user address space is 41-bits (2TBs user VM) */ -#define TASK_SIZE_USER64 (0x0000020000000000UL) +/* 64-bit user address space is 44-bits (16TB user VM) */ +#define TASK_SIZE_USER64 (0x0000100000000000UL) /* * 32-bit user address space is 4GB - 1 page From 34153fa3af45d84f3221d9b67ba2ab7e8a220d28 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 9 Aug 2005 10:36:34 +0200 Subject: [PATCH 218/584] [PATCH] flattened device tree changes This patch updates the format of the flattened device-tree passed between the boot trampoline and the kernel to support a more compact representation, for use by embedded systems mostly. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 175 ++++++++++++++++++++++++++++------ arch/ppc64/kernel/prom_init.c | 88 ++++++++++------- include/asm-ppc64/prom.h | 14 ++- 3 files changed, 209 insertions(+), 68 deletions(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 5aca01ddd81f..255c39ae1b48 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -625,8 +625,8 @@ void __init finish_device_tree(void) static inline char *find_flat_dt_string(u32 offset) { - return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings - + offset; + return ((char *)initial_boot_params) + + initial_boot_params->off_dt_strings + offset; } /** @@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset) * unflatten the tree */ static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *full_path, void *data), + const char *uname, int depth, + void *data), void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; int rc = 0; + int depth = -1; do { u32 tag = *((u32 *)p); char *pathp; p += 4; - if (tag == OF_DT_END_NODE) + if (tag == OF_DT_END_NODE) { + depth --; + continue; + } + if (tag == OF_DT_NOP) continue; if (tag == OF_DT_END) break; if (tag == OF_DT_PROP) { u32 sz = *((u32 *)p); p += 8; - p = _ALIGN(p, sz >= 8 ? 8 : 4); + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); p += sz; p = _ALIGN(p, 4); continue; @@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, " device tree !\n", tag); return -EINVAL; } + depth++; pathp = (char *)p; p = _ALIGN(p + strlen(pathp) + 1, 4); - rc = it(p, pathp, data); + if ((*pathp) == '/') { + char *lp, *np; + for (lp = NULL, np = pathp; *np; np++) + if ((*np) == '/') + lp = np+1; + if (lp != NULL) + pathp = lp; + } + rc = it(p, pathp, depth, data); if (rc != 0) break; } while(1); @@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, const char *nstr; p += 4; + if (tag == OF_DT_NOP) + continue; if (tag != OF_DT_PROP) return NULL; sz = *((u32 *)p); noff = *((u32 *)(p + 4)); p += 8; - p = _ALIGN(p, sz >= 8 ? 8 : 4); + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); nstr = find_flat_dt_string(noff); if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index name !\n"); + printk(KERN_WARNING "Can't find property index" + " name !\n"); return NULL; } if (strcmp(name, nstr) == 0) { @@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, } static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) + unsigned long align) { void *res; @@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, static unsigned long __init unflatten_dt_node(unsigned long mem, unsigned long *p, struct device_node *dad, - struct device_node ***allnextpp) + struct device_node ***allnextpp, + unsigned long fpsize) { struct device_node *np; struct property *pp, **prev_pp = NULL; char *pathp; u32 tag; - unsigned int l; + unsigned int l, allocl; + int has_name = 0; + int new_format = 0; tag = *((u32 *)(*p)); if (tag != OF_DT_BEGIN_NODE) { @@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p += 4; pathp = (char *)*p; - l = strlen(pathp) + 1; + l = allocl = strlen(pathp) + 1; *p = _ALIGN(*p + l, 4); - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l, + /* version 0x10 has a more compact unit name here instead of the full + * path. we accumulate the full path size using "fpsize", we'll rebuild + * it later. We detect this because the first character of the name is + * not '/'. + */ + if ((*pathp) != '/') { + new_format = 1; + if (fpsize == 0) { + /* root node: special case. fpsize accounts for path + * plus terminating zero. root node only has '/', so + * fpsize should be 2, but we want to avoid the first + * level nodes to have two '/' so we use fpsize 1 here + */ + fpsize = 1; + allocl = 2; + } else { + /* account for '/' and path size minus terminal 0 + * already in 'l' + */ + fpsize += l; + allocl = fpsize; + } + } + + + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, __alignof__(struct device_node)); if (allnextpp) { memset(np, 0, sizeof(*np)); np->full_name = ((char*)np) + sizeof(struct device_node); - memcpy(np->full_name, pathp, l); + if (new_format) { + char *p = np->full_name; + /* rebuild full path for new format */ + if (dad && dad->parent) { + strcpy(p, dad->full_name); +#ifdef DEBUG + if ((strlen(p) + l + 1) != allocl) { + DBG("%s: p: %d, l: %d, a: %d\n", + pathp, strlen(p), l, allocl); + } +#endif + p += strlen(p); + } + *(p++) = '/'; + memcpy(p, pathp, l); + } else + memcpy(np->full_name, pathp, l); prev_pp = &np->properties; **allnextpp = np; *allnextpp = &np->allnext; if (dad != NULL) { np->parent = dad; - /* we temporarily use the `next' field as `last_child'. */ + /* we temporarily use the next field as `last_child'*/ if (dad->next == 0) dad->child = np; else @@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, char *pname; tag = *((u32 *)(*p)); + if (tag == OF_DT_NOP) { + *p += 4; + continue; + } if (tag != OF_DT_PROP) break; *p += 4; sz = *((u32 *)(*p)); noff = *((u32 *)((*p) + 4)); - *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4); + *p += 8; + if (initial_boot_params->version < 0x10) + *p = _ALIGN(*p, sz >= 8 ? 8 : 4); pname = find_flat_dt_string(noff); if (pname == NULL) { printk("Can't find property name in list !\n"); break; } + if (strcmp(pname, "name") == 0) + has_name = 1; l = strlen(pname) + 1; pp = unflatten_dt_alloc(&mem, sizeof(struct property), __alignof__(struct property)); @@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p = _ALIGN((*p) + sz, 4); } + /* with version 0x10 we may not have the name property, recreate + * it here from the unit name if absent + */ + if (!has_name) { + char *p = pathp, *ps = pathp, *pa = NULL; + int sz; + + while (*p) { + if ((*p) == '@') + pa = p; + if ((*p) == '/') + ps = p + 1; + p++; + } + if (pa < ps) + pa = p; + sz = (pa - ps) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, + __alignof__(struct property)); + if (allnextpp) { + pp->name = "name"; + pp->length = sz; + pp->value = (unsigned char *)(pp + 1); + *prev_pp = pp; + prev_pp = &pp->next; + memcpy(pp->value, ps, sz - 1); + ((char *)pp->value)[sz - 1] = 0; + DBG("fixed up name for %s -> %s\n", pathp, pp->value); + } + } if (allnextpp) { *prev_pp = NULL; np->name = get_property(np, "name", NULL); @@ -812,7 +914,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, np->type = ""; } while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp); + mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); tag = *((u32 *)(*p)); } if (tag != OF_DT_END_NODE) { @@ -842,21 +944,27 @@ void __init unflatten_device_tree(void) /* First pass, scan for size */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL); + size = unflatten_dt_node(0, &start, NULL, NULL, 0); + size = (size | 3) + 1; DBG(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = (unsigned long)abs_to_virt(lmb_alloc(size, + mem = (unsigned long)abs_to_virt(lmb_alloc(size + 4, __alignof__(struct device_node))); + ((u32 *)mem)[size / 4] = 0xdeadbeef; + DBG(" unflattening...\n", mem); /* Second pass, do actual unflattening */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp); + unflatten_dt_node(mem, &start, NULL, &allnextp, 0); if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start)); + printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); + if (((u32 *)mem)[size / 4] != 0xdeadbeef) + printk(KERN_WARNING "End of tree marker overwritten: %08x\n", + ((u32 *)mem)[size / 4] ); *allnextp = NULL; /* Get pointer to OF "/chosen" node for use everywhere */ @@ -880,7 +988,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); u32 *prop; @@ -947,13 +1055,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } static int __init early_init_dt_scan_chosen(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { u32 *prop; u64 *prop64; extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; - if (strcmp(full_path, "/chosen") != 0) + DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); + + if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; /* get platform type */ @@ -1003,18 +1113,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node, } static int __init early_init_dt_scan_root(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { u32 *prop; - if (strcmp(full_path, "/") != 0) + if (depth != 0) return 0; prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; - + DBG("dt_root_size_cells = %x\n", dt_root_size_cells); + prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; + DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); /* break now */ return 1; @@ -1042,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; @@ -1058,7 +1170,9 @@ static int __init early_init_dt_scan_memory(unsigned long node, endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ...\n", full_path); + DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + uname, l, reg[0], reg[1], reg[2], reg[3]); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { unsigned long base, size; @@ -1469,10 +1583,11 @@ struct device_node *of_find_node_by_path(const char *path) struct device_node *np = allnodes; read_lock(&devtree_lock); - for (; np != 0; np = np->allnext) + for (; np != 0; np = np->allnext) { if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 && of_node_get(np)) break; + } read_unlock(&devtree_lock); return np; } diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index dbbe6c79d8da..adcf972711fc 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1534,7 +1534,8 @@ static unsigned long __init dt_find_string(char *str) */ #define MAX_PROPERTY_NAME 64 -static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, +static void __init scan_dt_build_strings(phandle node, + unsigned long *mem_start, unsigned long *mem_end) { unsigned long offset = reloc_offset(); @@ -1547,16 +1548,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, /* get and store all property names */ prev_name = RELOC(""); for (;;) { - int rc; - /* 64 is max len of name including nul. */ namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); - rc = call_prom("nextprop", 3, 1, node, prev_name, namep); - if (rc != 1) { + if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { /* No more nodes: unwind alloc */ *mem_start = (unsigned long)namep; break; } + + /* skip "name" */ + if (strcmp(namep, RELOC("name")) == 0) { + *mem_start = (unsigned long)namep; + prev_name = RELOC("name"); + continue; + } + /* get/create string entry */ soff = dt_find_string(namep); if (soff != 0) { *mem_start = (unsigned long)namep; @@ -1571,7 +1577,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != (phandle)0) { + while (child != 0) { scan_dt_build_strings(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1580,16 +1586,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long *mem_end) { - int l, align; phandle child; - char *namep, *prev_name, *sstart, *p, *ep; + char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); - char pname[MAX_PROPERTY_NAME]; - char *path; - - path = RELOC(prom_scratch); + static char pname[MAX_PROPERTY_NAME]; + int l; dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); @@ -1599,23 +1602,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, namep, *mem_end - *mem_start); if (l >= 0) { /* Didn't fit? Get more room. */ - if (l+1 > *mem_end - *mem_start) { + if ((l+1) > (*mem_end - *mem_start)) { namep = make_room(mem_start, mem_end, l+1, 1); call_prom("package-to-path", 3, 1, node, namep, l); } namep[l] = '\0'; + /* Fixup an Apple bug where they have bogus \0 chars in the * middle of the path in some properties */ for (p = namep, ep = namep + l; p < ep; p++) if (*p == '\0') { memmove(p, p+1, ep - p); - ep--; l--; + ep--; l--; p--; } - *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); + + /* now try to extract the unit name in that mess */ + for (p = namep, lp = NULL; *p; p++) + if (*p == '/') + lp = p + 1; + if (lp != NULL) + memmove(namep, lp, strlen(lp) + 1); + *mem_start = _ALIGN(((unsigned long) namep) + + strlen(namep) + 1, 4); } /* get it again for debugging */ + path = RELOC(prom_scratch); memset(path, 0, PROM_SCRATCH_SIZE); call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); @@ -1623,23 +1636,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prev_name = RELOC(""); sstart = (char *)RELOC(dt_string_start); for (;;) { - int rc; - - rc = call_prom("nextprop", 3, 1, node, prev_name, pname); - if (rc != 1) + if (call_prom("nextprop", 3, 1, node, prev_name, + RELOC(pname)) != 1) break; + /* skip "name" */ + if (strcmp(RELOC(pname), RELOC("name")) == 0) { + prev_name = RELOC("name"); + continue; + } + /* find string offset */ - soff = dt_find_string(pname); + soff = dt_find_string(RELOC(pname)); if (soff == 0) { - prom_printf("WARNING: Can't find string index for <%s>, node %s\n", - pname, path); + prom_printf("WARNING: Can't find string index for" + " <%s>, node %s\n", RELOC(pname), path); break; } prev_name = sstart + soff; /* get length */ - l = call_prom("getproplen", 2, 1, node, pname); + l = call_prom("getproplen", 2, 1, node, RELOC(pname)); /* sanity checks */ if (l == PROM_ERROR) @@ -1648,7 +1665,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prom_printf("WARNING: ignoring large property "); /* It seems OF doesn't null-terminate the path :-( */ prom_printf("[%s] ", path); - prom_printf("%s length 0x%x\n", pname, l); + prom_printf("%s length 0x%x\n", RELOC(pname), l); continue; } @@ -1658,17 +1675,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, dt_push_token(soff, mem_start, mem_end); /* push property content */ - align = (l >= 8) ? 8 : 4; - valp = make_room(mem_start, mem_end, l, align); - call_prom("getprop", 4, 1, node, pname, valp, l); + valp = make_room(mem_start, mem_end, l, 4); + call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); *mem_start = _ALIGN(*mem_start, 4); } /* Add a "linux,phandle" property. */ soff = dt_find_string(RELOC("linux,phandle")); if (soff == 0) - prom_printf("WARNING: Can't find string index for " - " node %s\n", path); + prom_printf("WARNING: Can't find string index for" + " node %s\n", path); else { dt_push_token(OF_DT_PROP, mem_start, mem_end); dt_push_token(4, mem_start, mem_end); @@ -1679,7 +1695,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != (phandle)0) { + while (child != 0) { scan_dt_build_struct(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1718,7 +1734,8 @@ static void __init flatten_device_tree(void) /* Build header and make room for mem rsv map */ mem_start = _ALIGN(mem_start, 4); - hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); + hdr = make_room(&mem_start, &mem_end, + sizeof(struct boot_param_header), 4); RELOC(dt_header_start) = (unsigned long)hdr; rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); @@ -1731,11 +1748,11 @@ static void __init flatten_device_tree(void) namep = make_room(&mem_start, &mem_end, 16, 1); strcpy(namep, RELOC("linux,phandle")); mem_start = (unsigned long)namep + strlen(namep) + 1; - RELOC(dt_string_end) = mem_start; /* Build string array */ prom_printf("Building dt strings...\n"); scan_dt_build_strings(root, &mem_start, &mem_end); + RELOC(dt_string_end) = mem_start; /* Build structure */ mem_start = PAGE_ALIGN(mem_start); @@ -1750,9 +1767,11 @@ static void __init flatten_device_tree(void) hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); + hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); hdr->version = OF_DT_VERSION; - hdr->last_comp_version = 1; + /* Version 16 is not backward compatible */ + hdr->last_comp_version = 0x10; /* Reserve the whole thing and copy the reserve map in, we * also bump mem_reserve_cnt to cause further reservations to @@ -1808,6 +1827,9 @@ static void __init fixup_device_tree(void) /* does it need fixup ? */ if (prom_getproplen(i2c, "interrupts") > 0) return; + + prom_printf("fixing up bogus interrupts for u3 i2c...\n"); + /* interrupt on this revision of u3 is number 0 and level */ interrupts[0] = 0; interrupts[1] = 1; diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index 04b1a84f7ca3..dc5330b39509 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h @@ -22,13 +22,15 @@ #define RELOC(x) (*PTRRELOC(&(x))) /* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ -#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ +#define OF_DT_HEADER 0xd00dfeed /* marker */ +#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ #define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, content */ +#define OF_DT_PROP 0x3 /* Property: name off, size, + * content */ +#define OF_DT_NOP 0x4 /* nop */ #define OF_DT_END 0x9 -#define OF_DT_VERSION 1 +#define OF_DT_VERSION 0x10 /* * This is what gets passed to the kernel by prom_init or kexec @@ -54,7 +56,9 @@ struct boot_param_header u32 version; /* format version */ u32 last_comp_version; /* last compatible version */ /* version 2 fields below */ - u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ + u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ + /* version 3 fields below */ + u32 dt_strings_size; /* size of the DT strings block */ }; From 293da76b3d4c2f362f906bce8c5d2e053bdf8d44 Mon Sep 17 00:00:00 2001 From: Jake Moilanen Date: Thu, 9 Jun 2005 09:31:12 -0500 Subject: [PATCH 219/584] [PATCH] ppc64: PCI device-node failure detection OpenFirmware marks devices as failed in the device-tree when a hardware problem is detected. The kernel needs to fail config reads/writes to prevent a kernel crash when incorrect data is read. This patch validates that the device-node is not marked "fail" when config space reads/writes are attempted. Signed-off-by: Jake Moilanen Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/rtas_pci.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c index 1048817befb8..1dccadaddd1d 100644 --- a/arch/ppc64/kernel/rtas_pci.c +++ b/arch/ppc64/kernel/rtas_pci.c @@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where) return 0; } +static int of_device_available(struct device_node * dn) +{ + char * status; + + status = get_property(dn, "status", NULL); + + if (!status) + return 1; + + if (!strcmp(status, "okay")) + return 1; + + return 0; +} + static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) { int returnval = -1; @@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) + if (dn->devfn == devfn && of_device_available(dn)) return rtas_read_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } @@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) + if (dn->devfn == devfn && of_device_available(dn)) return rtas_write_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } From 6020164499ff3a61cd8bebceb9e294a155079f71 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 28 Jun 2005 16:48:04 -0700 Subject: [PATCH 220/584] [PATCH] ppc64: change duplicate Kconfig menu "General setup" to "Bus Options" arch/ppc64/Kconfig defines a "General setup" menu, but also sources init/Kconfig which also defines a "General setup" menu. Both of these menus appear at the top level of make menuconfig. Having two menus with the same name is confusing. This patch renames the ppc64/Kconfig menu to be "Bus Options" and moves options in this menu which are not bus related to the end of the "Platform support" menu. There are many variations among architectures on the exact naming of the "Bus Options" menu. I chose to use the simplest one, which is also used in arch/ppc/Kconfig. Signed-off-by: Frank Rowand Signed-off-by: Paul Mackerras --- arch/ppc64/Kconfig | 68 +++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 2ce87836c671..4d4f81c65012 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -350,13 +350,46 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "fs/Kconfig.binfmt" + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) + select HOTPLUG + ---help--- + Say Y here to be able to turn CPUs off and on. + + Say N if you are unsure. + +config PROC_DEVICETREE + bool "Support for Open Firmware device tree in /proc" + depends on !PPC_ISERIES + help + This option adds a device-tree directory under /proc which contains + an image of the device tree that the kernel copies from Open + Firmware. If unsure, say Y here. + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + depends on !PPC_ISERIES + +config CMDLINE + string "Initial kernel command string" + depends on CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, you can supply + some command-line options at build time by entering them here. In + most cases you will need to specify the root device here. + endmenu config ISA_DMA_API bool default y -menu "General setup" +menu "Bus Options" config ISA bool @@ -389,45 +422,12 @@ config PCI_DOMAINS bool default PCI -source "fs/Kconfig.binfmt" - source "drivers/pci/Kconfig" -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" - depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) - select HOTPLUG - ---help--- - Say Y here to be able to turn CPUs off and on. - - Say N if you are unsure. - source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" -config PROC_DEVICETREE - bool "Support for Open Firmware device tree in /proc" - depends on !PPC_ISERIES - help - This option adds a device-tree directory under /proc which contains - an image of the device tree that the kernel copies from Open - Firmware. If unsure, say Y here. - -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - depends on !PPC_ISERIES - -config CMDLINE - string "Initial kernel command string" - depends on CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" - help - On some platforms, there is currently no way for the boot loader to - pass arguments to the kernel. For these platforms, you can supply - some command-line options at build time by entering them here. In - most cases you will need to specify the root device here. - endmenu source "net/Kconfig" From 3e494c80481653bbc810b4e67651097595ea0294 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:40:17 +1000 Subject: [PATCH 221/584] [PATCH] ppc64: split iSeries specific parts out of vio.c This patch splits the iSeries specific parts out of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 4 +- arch/ppc64/kernel/iSeries_vio.c | 133 ++++++++++++++++++++++++++++ arch/ppc64/kernel/vio.c | 149 +++++--------------------------- include/asm-ppc64/vio.h | 7 ++ 4 files changed, 167 insertions(+), 126 deletions(-) create mode 100644 arch/ppc64/kernel/iSeries_vio.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 2ecccb6b4f8c..a22c94f6b2db 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -50,7 +50,9 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_IBMVIO) += vio.o + +vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o +obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_MPIC) += mpic.o diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c new file mode 100644 index 000000000000..e876b4380278 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -0,0 +1,133 @@ +/* + * IBM PowerPC iSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2005 Stephen Rothwell, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct device *iSeries_vio_dev = &vio_bus_device.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; + +void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} + +/** + * vio_register_device: - Register a new vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + return vio_register_device_common(viodev, viodev->dev.bus_id, type, + unit_num, &vio_iommu_table); +} + +void __init probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map; + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} + +/** + * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus + */ +static int __init vio_bus_init_iseries(void) +{ + int err; + + err = vio_bus_init(); + if (err == 0) { + vio_bus_device.iommu_table = &vio_iommu_table; + iSeries_vio_dev = &vio_bus_device.dev; + probe_bus_iseries(); + } + return err; +} + +__initcall(vio_bus_init_iseries); diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 0c0ba71ac0e8..4b9e3712e384 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -25,10 +25,6 @@ #include #include #include -#include -#include -#include -#include #define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) @@ -41,26 +37,14 @@ static const struct vio_device_id *vio_match_device( static struct iommu_table *vio_build_iommu_table(struct vio_dev *); static int vio_num_address_cells; #endif -#ifdef CONFIG_PPC_ISERIES -static struct iommu_table veth_iommu_table; -static struct iommu_table vio_iommu_table; -#endif -static struct vio_dev vio_bus_device = { /* fake "parent" device */ +struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, .type = "", -#ifdef CONFIG_PPC_ISERIES - .iommu_table = &vio_iommu_table, -#endif .dev.bus_id = "vio", .dev.bus = &vio_bus_type, }; #ifdef CONFIG_PPC_ISERIES -static struct vio_dev *__init vio_register_device_iseries(char *type, - uint32_t unit_num); - -struct device *iSeries_vio_dev = &vio_bus_device.dev; -EXPORT_SYMBOL(iSeries_vio_dev); #define device_is_compatible(a, b) 1 @@ -157,48 +141,6 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id return NULL; } -#ifdef CONFIG_PPC_ISERIES -void __init iommu_vio_init(void) -{ - struct iommu_table *t; - struct iommu_table_cb cb; - unsigned long cbp; - unsigned long itc_entries; - - cb.itc_busno = 255; /* Bus 255 is the virtual bus */ - cb.itc_virtbus = 0xff; /* Ask for virtual bus */ - - cbp = virt_to_abs(&cb); - HvCallXm_getTceTableParms(cbp); - - itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); - veth_iommu_table.it_size = itc_entries / 2; - veth_iommu_table.it_busno = cb.itc_busno; - veth_iommu_table.it_offset = cb.itc_offset; - veth_iommu_table.it_index = cb.itc_index; - veth_iommu_table.it_type = TCE_VB; - veth_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&veth_iommu_table); - - if (!t) - printk("Virtual Bus VETH TCE table failed.\n"); - - vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; - vio_iommu_table.it_busno = cb.itc_busno; - vio_iommu_table.it_offset = cb.itc_offset + - veth_iommu_table.it_size; - vio_iommu_table.it_index = cb.itc_index; - vio_iommu_table.it_type = TCE_VB; - vio_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&vio_iommu_table); - - if (!t) - printk("Virtual Bus VIO TCE table failed.\n"); -} -#endif - #ifdef CONFIG_PPC_PSERIES static void probe_bus_pseries(void) { @@ -223,38 +165,10 @@ static void probe_bus_pseries(void) } #endif -#ifdef CONFIG_PPC_ISERIES -static void probe_bus_iseries(void) -{ - HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap(); - struct vio_dev *viodev; - int i; - - /* there is only one of each of these */ - vio_register_device_iseries("viocons", 0); - vio_register_device_iseries("vscsi", 0); - - vlan_map = HvLpConfig_getVirtualLanIndexMap(); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { - if ((vlan_map & (0x8000 >> i)) == 0) - continue; - viodev = vio_register_device_iseries("vlan", i); - /* veth is special and has it own iommu_table */ - viodev->iommu_table = &veth_iommu_table; - } - for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) - vio_register_device_iseries("viodasd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) - vio_register_device_iseries("viocd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) - vio_register_device_iseries("viotape", i); -} -#endif - /** * vio_bus_init: - Initialize the virtual IO bus */ -static int __init vio_bus_init(void) +int __init vio_bus_init(void) { int err; @@ -264,25 +178,35 @@ static int __init vio_bus_init(void) return err; } - /* the fake parent of all vio devices, just to give us a nice directory */ + /* the fake parent of all vio devices, just to give us + * a nice directory + */ err = device_register(&vio_bus_device.dev); if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__, - err); + printk(KERN_WARNING "%s: device_register returned %i\n", + __FUNCTION__, err); return err; } -#ifdef CONFIG_PPC_PSERIES - probe_bus_pseries(); -#endif -#ifdef CONFIG_PPC_ISERIES - probe_bus_iseries(); -#endif - return 0; } -__initcall(vio_bus_init); +#ifdef CONFIG_PPC_PSERIES +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); +#endif /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) @@ -312,7 +236,7 @@ static ssize_t viodev_show_name(struct device *dev, struct device_attribute *att } DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); -static struct vio_dev * __devinit vio_register_device_common( +struct vio_dev * __devinit vio_register_device_common( struct vio_dev *viodev, char *name, char *type, uint32_t unit_address, struct iommu_table *iommu_table) { @@ -408,31 +332,6 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) EXPORT_SYMBOL(vio_register_device_node); #endif -#ifdef CONFIG_PPC_ISERIES -/** - * vio_register_device: - Register a new vio device. - * @voidev: The device to register. - */ -static struct vio_dev *__init vio_register_device_iseries(char *type, - uint32_t unit_num) -{ - struct vio_dev *viodev; - - DBGENTER(); - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) - return NULL; - memset(viodev, 0, sizeof(struct vio_dev)); - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); - - return vio_register_device_common(viodev, viodev->dev.bus_id, type, - unit_num, &vio_iommu_table); -} -#endif - void __devinit vio_unregister_device(struct vio_dev *viodev) { DBGENTER(); diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 20cd98ee6337..1e6d4c4b83a0 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -56,6 +56,9 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); +extern struct vio_dev * __devinit vio_register_device_common( + struct vio_dev *viodev, char *name, char *type, + uint32_t unit_address, struct iommu_table *iommu_table); extern struct dma_mapping_ops vio_dma_ops; @@ -95,9 +98,13 @@ struct vio_dev { struct device dev; }; +extern struct vio_dev vio_bus_device; + static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } +extern int vio_bus_init(void); + #endif /* _ASM_VIO_H */ From 8c65b5c955b8598d9c63b4e97392377269873a54 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:42:49 +1000 Subject: [PATCH 222/584] [PATCH] ppc64: move iSeries vio iommu init Since the iSeries vio iommu tables cannot be used until after the vio bus has been initialised, move the initialisation of the tables to there. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 3 ++- arch/ppc64/mm/init.c | 3 --- include/asm-ppc64/iommu.h | 3 --- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index e876b4380278..48f0ebf4405d 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL(iSeries_vio_dev); static struct iommu_table veth_iommu_table; static struct iommu_table vio_iommu_table; -void __init iommu_vio_init(void) +static void __init iommu_vio_init(void) { struct iommu_table *t; struct iommu_table_cb cb; @@ -123,6 +123,7 @@ static int __init vio_bus_init_iseries(void) err = vio_bus_init(); if (err == 0) { + iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; iSeries_vio_dev = &vio_bus_device.dev; probe_bus_iseries(); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 87f256df8de5..9edfe267123e 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -686,9 +686,6 @@ void __init mem_init(void) mem_init_done = 1; -#ifdef CONFIG_PPC_ISERIES - iommu_vio_init(); -#endif /* Initialize the vDSO */ vdso_init(); } diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 729de5cc21d9..72dcf8116b04 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h @@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); #ifdef CONFIG_PPC_ISERIES -/* Initializes tables for bio buses */ -extern void __init iommu_vio_init(void); - struct iSeries_Device_Node; /* Creates table for an individual device node */ extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); From 6312236fe82bbd3b0e1dee60b3eb3b270a2f6aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:45:27 +1000 Subject: [PATCH 223/584] [PATCH] ppc64: make the bus matching function platform specific This patch allows us to have a different bus if matching function for each platform. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 12 +++++++++++- arch/ppc64/kernel/vio.c | 28 +++++++++++++++++++--------- include/asm-ppc64/vio.h | 3 ++- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index 48f0ebf4405d..2656b1ca834d 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -114,6 +114,16 @@ void __init probe_bus_iseries(void) vio_register_device_iseries("viotape", i); } +/** + * vio_match_device_iseries: - Tell if a iSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_iseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return strncmp(dev->type, id->type, strlen(id->type)) == 0; +} + /** * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus */ @@ -121,7 +131,7 @@ static int __init vio_bus_init_iseries(void) { int err; - err = vio_bus_init(); + err = vio_bus_init(vio_match_device_iseries); if (err == 0) { iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 4b9e3712e384..8a243cad0f8b 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -44,11 +44,8 @@ struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; -#ifdef CONFIG_PPC_ISERIES - -#define device_is_compatible(a, b) 1 - -#endif +static int (*is_match)(const struct vio_device_id *id, + const struct vio_dev *dev); /* convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match @@ -133,8 +130,7 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id DBGENTER(); while (ids->type) { - if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && - device_is_compatible(dev->dev.platform_data, ids->compat)) + if (is_match(ids, dev)) return ids; ids++; } @@ -168,10 +164,13 @@ static void probe_bus_pseries(void) /** * vio_bus_init: - Initialize the virtual IO bus */ -int __init vio_bus_init(void) +int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, + const struct vio_dev *dev)) { int err; + is_match = match_func; + err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); @@ -192,6 +191,17 @@ int __init vio_bus_init(void) } #ifdef CONFIG_PPC_PSERIES +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + /** * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus */ @@ -199,7 +209,7 @@ static int __init vio_bus_init_pseries(void) { int err; - err = vio_bus_init(); + err = vio_bus_init(vio_match_device_pseries); if (err == 0) probe_bus_pseries(); return err; diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 1e6d4c4b83a0..70644a232210 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -105,6 +105,7 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(void); +extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, + const struct vio_dev *dev)); #endif /* _ASM_VIO_H */ From 19dbd0f6a74f7529d6d49dd50ad6b31adbe0598d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:50:26 +1000 Subject: [PATCH 224/584] [PATCH] ppc64: split pSeries specific parts out of vio.c This patch just splits out the pSeries specific parts of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 1 + arch/ppc64/kernel/iSeries_vio.c | 2 +- arch/ppc64/kernel/pSeries_vio.c | 266 +++++++++++++++++++++++++++++ arch/ppc64/kernel/vio.c | 290 ++------------------------------ include/asm-ppc64/vio.h | 4 +- 5 files changed, 284 insertions(+), 279 deletions(-) create mode 100644 arch/ppc64/kernel/pSeries_vio.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index a22c94f6b2db..cbf87dcac92a 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o +vio-obj-$(CONFIG_PPC_PSERIES) += pSeries_vio.o vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) obj-$(CONFIG_XICS) += xics.o diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index 2656b1ca834d..b4268cc4ba48 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -131,7 +131,7 @@ static int __init vio_bus_init_iseries(void) { int err; - err = vio_bus_init(vio_match_device_iseries); + err = vio_bus_init(vio_match_device_iseries, NULL, NULL); if (err == 0) { iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c new file mode 100644 index 000000000000..338f9e1bdc09 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_vio.c @@ -0,0 +1,266 @@ +/* + * IBM PowerPC pSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ + +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} + +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + +static void vio_release_device_pseries(struct device *dev) +{ + /* XXX free TCE table */ + of_node_put(dev->platform_data); +} + +static ssize_t viodev_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); + +static void vio_unregister_device_pseries(struct vio_dev *viodev) +{ + device_remove_file(&viodev->dev, &dev_attr_devspec); +} + +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(vio_match_device_pseries, + vio_unregister_device_pseries, + vio_release_device_pseries); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); + +/** + * vio_build_iommu_table: - gets the dma information from OF and + * builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + + /* register with generic device framework */ + if (vio_register_device_common(viodev, of_node->name, of_node->type, + *unit_address, vio_build_iommu_table(viodev)) + == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 8a243cad0f8b..3b790bafcaad 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -1,10 +1,11 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003 IBM Corp. + * Copyright (c) 2003-2005 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard + * Stephen Rothwell * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,29 +15,16 @@ #include #include -#include #include -#include #include #include -#include #include #include -#include #include -#include - -#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) - -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ static const struct vio_device_id *vio_match_device( const struct vio_device_id *, const struct vio_dev *); -#ifdef CONFIG_PPC_PSERIES -static struct iommu_table *vio_build_iommu_table(struct vio_dev *); -static int vio_num_address_cells; -#endif struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, .type = "", @@ -46,6 +34,8 @@ struct vio_dev vio_bus_device = { /* fake "parent" device */ static int (*is_match)(const struct vio_device_id *id, const struct vio_dev *dev); +static void (*unregister_device_callback)(struct vio_dev *dev); +static void (*release_device_callback)(struct device *dev); /* convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match @@ -57,8 +47,6 @@ static int vio_bus_probe(struct device *dev) const struct vio_device_id *id; int error = -ENODEV; - DBGENTER(); - if (!viodrv->probe) return error; @@ -76,8 +64,6 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); - DBGENTER(); - if (viodrv->remove) { return viodrv->remove(viodev); } @@ -127,8 +113,6 @@ EXPORT_SYMBOL(vio_unregister_driver); static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, const struct vio_dev *dev) { - DBGENTER(); - while (ids->type) { if (is_match(ids, dev)) return ids; @@ -137,39 +121,19 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id return NULL; } -#ifdef CONFIG_PPC_PSERIES -static void probe_bus_pseries(void) -{ - struct device_node *node_vroot, *of_node; - - node_vroot = find_devices("vdevice"); - if ((node_vroot == NULL) || (node_vroot->child == NULL)) - /* this machine doesn't do virtual IO, and that's ok */ - return; - - vio_num_address_cells = prom_n_addr_cells(node_vroot->child); - - /* - * Create struct vio_devices for each virtual device in the device tree. - * Drivers will associate with them later. - */ - for (of_node = node_vroot->child; of_node != NULL; - of_node = of_node->sibling) { - printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); - vio_register_device_node(of_node); - } -} -#endif - /** * vio_bus_init: - Initialize the virtual IO bus */ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, - const struct vio_dev *dev)) + const struct vio_dev *dev), + void (*unregister_dev)(struct vio_dev *), + void (*release_dev)(struct device *)) { int err; is_match = match_func; + unregister_device_callback = unregister_dev; + release_device_callback = release_dev; err = bus_register(&vio_bus_type); if (err) { @@ -190,56 +154,14 @@ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, return 0; } -#ifdef CONFIG_PPC_PSERIES -/** - * vio_match_device_pseries: - Tell if a pSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_pseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && - device_is_compatible(dev->dev.platform_data, id->compat); -} - -/** - * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus - */ -static int __init vio_bus_init_pseries(void) -{ - int err; - - err = vio_bus_init(vio_match_device_pseries); - if (err == 0) - probe_bus_pseries(); - return err; -} - -__initcall(vio_bus_init_pseries); -#endif - /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - DBGENTER(); - -#ifdef CONFIG_PPC_PSERIES - /* XXX free TCE table */ - of_node_put(dev->platform_data); -#endif + if (release_device_callback) + release_device_callback(dev); kfree(to_vio_dev(dev)); } -#ifdef CONFIG_PPC_PSERIES -static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->platform_data; - - return sprintf(buf, "%s\n", of_node->full_name); -} -DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); -#endif - static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_vio_dev(dev)->name); @@ -250,8 +172,6 @@ struct vio_dev * __devinit vio_register_device_common( struct vio_dev *viodev, char *name, char *type, uint32_t unit_address, struct iommu_table *iommu_table) { - DBGENTER(); - viodev->name = name; viodev->type = type; viodev->unit_address = unit_address; @@ -272,197 +192,15 @@ struct vio_dev * __devinit vio_register_device_common( return viodev; } -#ifdef CONFIG_PPC_PSERIES -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node (dev.platform_data) and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - unsigned int *unit_address; - unsigned int *irq_p; - - DBGENTER(); - - /* we need the 'device_type' property, in order to match with drivers */ - if ((NULL == of_node->type)) { - printk(KERN_WARNING - "%s: node %s missing 'device_type'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); - if (!unit_address) { - printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) { - return NULL; - } - memset(viodev, 0, sizeof(struct vio_dev)); - - viodev->dev.platform_data = of_node_get(of_node); - - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); - - /* register with generic device framework */ - if (vio_register_device_common(viodev, of_node->name, of_node->type, - *unit_address, vio_build_iommu_table(viodev)) - == NULL) { - /* XXX free TCE table */ - kfree(viodev); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_devspec); - - return viodev; -} -EXPORT_SYMBOL(vio_register_device_node); -#endif - void __devinit vio_unregister_device(struct vio_dev *viodev) { - DBGENTER(); -#ifdef CONFIG_PPC_PSERIES - device_remove_file(&viodev->dev, &dev_attr_devspec); -#endif + if (unregister_device_callback) + unregister_device_callback(viodev); device_remove_file(&viodev->dev, &dev_attr_name); device_unregister(&viodev->dev); } EXPORT_SYMBOL(vio_unregister_device); -#ifdef CONFIG_PPC_PSERIES -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's get_property() to return the value of the - * attribute specified by the preprocessor constant @which -*/ -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) -{ - return get_property(vdev->dev.platform_data, (char*)which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - * XXX once vio_bus_type.devices is actually used as a kset in - * drivers/base/bus.c, this function should be removed in favor of - * "device_find(kobj_name, &vio_bus_type)" - */ -static struct vio_dev *vio_find_name(const char *kobj_name) -{ - struct kobject *found; - - found = kset_find_obj(&devices_subsys.kset, kobj_name); - if (!found) - return NULL; - - return to_vio_dev(container_of(found, struct device, kobj)); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - uint32_t *unit_address; - char kobj_name[BUS_ID_SIZE]; - - /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); - if (!unit_address) - return NULL; - snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -/** - * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree. - * @dev: the virtual device. - * - * Returns a pointer to the built tce tree, or NULL if it can't - * find property. -*/ -static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev) -{ - unsigned int *dma_window; - struct iommu_table *newTceTable; - unsigned long offset; - int dma_window_property_size; - - dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); - if(!dma_window) { - return NULL; - } - - newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - /* There should be some code to extract the phys-encoded offset - using prom_n_addr_cells(). However, according to a comment - on earlier versions, it's always zero, so we don't bother */ - offset = dma_window[1] >> PAGE_SHIFT; - - /* TCE table size - measured in tce entries */ - newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; - /* offset for VIO should always be 0 */ - newTceTable->it_offset = offset; - newTceTable->it_busno = 0; - newTceTable->it_index = (unsigned long)dma_window[0]; - newTceTable->it_type = TCE_VB; - - return iommu_init_table(newTceTable); -} - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_Success) { - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - } - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_Success) { - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - } - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts); -#endif - static dma_addr_t vio_map_single(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { @@ -526,8 +264,6 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) const struct vio_device_id *ids = vio_drv->id_table; const struct vio_device_id *found_id; - DBGENTER(); - if (!ids) return 0; diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 70644a232210..a82e87c1c5fa 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -106,6 +106,8 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) } extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev)); + const struct vio_dev *dev), + void (*)(struct vio_dev *), + void (*)(struct device *)); #endif /* _ASM_VIO_H */ From 2e2446ea0758cd57dd065962d9544e3f4d44ea2b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 225/584] [PATCH] Remove NACA fixed address constraint Comments in head.S suggest that the iSeries naca has a fixed address, because tools expect to find it there. The only tool which appears to access the naca is addRamDisk, but both the in-kernel version and the version used in RHEL and SuSE in fact locate the NACA the same way as the hypervisor does, by following the pointer in the hvReleaseData structure. Since the requirement for a fixed address seems to be obsolete, this patch removes the naca from head.S and replaces it with a normal C initializer. For good measure, it removes an old version of addRamDisk.c which was sitting, unused, in the ppc32 tree. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc/boot/utils/addRamDisk.c | 203 ------------------------------- arch/ppc64/kernel/LparData.c | 11 ++ arch/ppc64/kernel/head.S | 17 +-- include/asm-ppc64/naca.h | 7 -- 4 files changed, 12 insertions(+), 226 deletions(-) delete mode 100644 arch/ppc/boot/utils/addRamDisk.c diff --git a/arch/ppc/boot/utils/addRamDisk.c b/arch/ppc/boot/utils/addRamDisk.c deleted file mode 100644 index 93400dfcce7f..000000000000 --- a/arch/ppc/boot/utils/addRamDisk.c +++ /dev/null @@ -1,203 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define ElfHeaderSize (64 * 1024) -#define ElfPages (ElfHeaderSize / 4096) -#define KERNELBASE (0xc0000000) - -void get4k(FILE *file, char *buf ) -{ - unsigned j; - unsigned num = fread(buf, 1, 4096, file); - for ( j=num; j<4096; ++j ) - buf[j] = 0; -} - -void put4k(FILE *file, char *buf ) -{ - fwrite(buf, 1, 4096, file); -} - -void death(const char *msg, FILE *fdesc, const char *fname) -{ - printf(msg); - fclose(fdesc); - unlink(fname); - exit(1); -} - -int main(int argc, char **argv) -{ - char inbuf[4096]; - FILE *ramDisk = NULL; - FILE *inputVmlinux = NULL; - FILE *outputVmlinux = NULL; - unsigned i = 0; - u_int32_t ramFileLen = 0; - u_int32_t ramLen = 0; - u_int32_t roundR = 0; - u_int32_t kernelLen = 0; - u_int32_t actualKernelLen = 0; - u_int32_t round = 0; - u_int32_t roundedKernelLen = 0; - u_int32_t ramStartOffs = 0; - u_int32_t ramPages = 0; - u_int32_t roundedKernelPages = 0; - u_int32_t hvReleaseData = 0; - u_int32_t eyeCatcher = 0xc8a5d9c4; - u_int32_t naca = 0; - u_int32_t xRamDisk = 0; - u_int32_t xRamDiskSize = 0; - if ( argc < 2 ) { - printf("Name of RAM disk file missing.\n"); - exit(1); - } - - if ( argc < 3 ) { - printf("Name of vmlinux file missing.\n"); - exit(1); - } - - if ( argc < 4 ) { - printf("Name of vmlinux output file missing.\n"); - exit(1); - } - - ramDisk = fopen(argv[1], "r"); - if ( ! ramDisk ) { - printf("RAM disk file \"%s\" failed to open.\n", argv[1]); - exit(1); - } - inputVmlinux = fopen(argv[2], "r"); - if ( ! inputVmlinux ) { - printf("vmlinux file \"%s\" failed to open.\n", argv[2]); - exit(1); - } - outputVmlinux = fopen(argv[3], "w+"); - if ( ! outputVmlinux ) { - printf("output vmlinux file \"%s\" failed to open.\n", argv[3]); - exit(1); - } - fseek(ramDisk, 0, SEEK_END); - ramFileLen = ftell(ramDisk); - fseek(ramDisk, 0, SEEK_SET); - printf("%s file size = %d\n", argv[1], ramFileLen); - - ramLen = ramFileLen; - - roundR = 4096 - (ramLen % 4096); - if ( roundR ) { - printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR); - ramLen += roundR; - } - - printf("Rounded RAM disk size is %d\n", ramLen); - fseek(inputVmlinux, 0, SEEK_END); - kernelLen = ftell(inputVmlinux); - fseek(inputVmlinux, 0, SEEK_SET); - printf("kernel file size = %d\n", kernelLen); - if ( kernelLen == 0 ) { - printf("You must have a linux kernel specified as argv[2]\n"); - exit(1); - } - - actualKernelLen = kernelLen - ElfHeaderSize; - - printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); - - round = actualKernelLen % 4096; - roundedKernelLen = actualKernelLen; - if ( round ) - roundedKernelLen += (4096 - round); - - printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen); - - ramStartOffs = roundedKernelLen; - ramPages = ramLen / 4096; - - printf("RAM disk pages to copy = %d\n", ramPages); - - // Copy 64K ELF header - for (i=0; i<(ElfPages); ++i) { - get4k( inputVmlinux, inbuf ); - put4k( outputVmlinux, inbuf ); - } - - roundedKernelPages = roundedKernelLen / 4096; - - fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); - - for ( i=0; i #include #include -#include #include #include #include @@ -511,24 +510,10 @@ _GLOBAL(do_stab_bolted_pSeries) mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) - - /* Space for the naca. Architected to be located at real address - * NACA_PHYS_ADDR. Various tools rely on this location being fixed. - * The first dword of the naca is required by iSeries LPAR to - * point to itVpdAreas. On pSeries native, this value is not used. - */ - . = NACA_PHYS_ADDR - .globl __end_interrupts -__end_interrupts: -#ifdef CONFIG_PPC_ISERIES - .globl naca -naca: - .llong itVpdAreas - .llong 0 /* xRamDisk */ - .llong 0 /* xRamDiskSize */ . = 0x6100 +#ifdef CONFIG_PPC_ISERIES /*** ISeries-LPAR interrupt handlers ***/ STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h index bfb7caa32eaf..d2afe6447597 100644 --- a/include/asm-ppc64/naca.h +++ b/include/asm-ppc64/naca.h @@ -12,8 +12,6 @@ #include -#ifndef __ASSEMBLY__ - struct naca_struct { /* Kernel only data - undefined for user space */ void *xItVpdAreas; /* VPD Data 0x00 */ @@ -23,9 +21,4 @@ struct naca_struct { extern struct naca_struct naca; -#endif /* __ASSEMBLY__ */ - -#define NACA_PAGE 0x4 -#define NACA_PHYS_ADDR (NACA_PAGE< Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 226/584] [PATCH] Move iSeries and common vectors into unused space in head.S In the ppc64 kernel head.S there is currently quite a lot of unused space between the naca (at fixed address 0x4000) and the fwnmi data area (at fixed address 0x7000). This patch moves various exception vectors and support code into this region to use the wasted space. The functions load_up_fpu and load_up_altivec are moved down as well, since they are essentially continuations of the fp_unavailable_common and altivec_unavailable_common vectors, respectively. Likewise, the fwnmi vectors themselves are moved down into this area, because while the location of the fwnmi data area is fixed by the RPA, the vectors themselves can be anywhere sufficiently low. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 355 +++++++++++++++++++-------------------- 1 file changed, 175 insertions(+), 180 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 13c03648a602..eb54f0548b01 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -52,9 +52,8 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x3fff : Interrupt support - * 0x4000 - 0x4fff : NACA - * 0x6000 : iSeries and common interrupt prologs + * 0x3000 - 0x6fff : interrupt support, iSeries and common interrupt prologs + * 0x7000 - 0x7fff : FWNMI data area * 0x9000 - 0x9fff : Initial segment table */ @@ -501,17 +500,35 @@ system_call_pSeries: STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(0x3000, performance_monitor) + . = 0x3000 - . = 0x3100 +/*** pSeries interrupt support ***/ + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(., performance_monitor) + + .align 7 _GLOBAL(do_stab_bolted_pSeries) mtcrf 0x80,r12 mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) +/* + * Vectors for the FWNMI option. Share common code. + */ + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) - . = 0x6100 + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) #ifdef CONFIG_PPC_ISERIES /*** ISeries-LPAR interrupt handlers ***/ @@ -656,51 +673,8 @@ hardware_interrupt_iSeries_masked: ld r13,PACA_EXGEN+EX_R13(r13) rfid b . /* prevent speculative execution */ -#endif - -/* - * Data area reserved for FWNMI option. - */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: - -#ifdef CONFIG_PPC_ISERIES - . = LPARMAP_PHYS -#include "lparmap.s" #endif /* CONFIG_PPC_ISERIES */ -/* - * Vectors for the FWNMI option. Share common code. - */ - . = 0x8000 - .globl system_reset_fwnmi -system_reset_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) - .globl machine_check_fwnmi -machine_check_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) - - /* - * Space for the initial segment table - * For LPAR, the hypervisor must fill in at least one entry - * before we get control (with relocate on) - */ - . = STAB0_PHYS_ADDR - .globl __start_stab -__start_stab: - - . = (STAB0_PHYS_ADDR + PAGE_SIZE) - .globl __end_stab -__end_stab: - - /*** Common interrupt handlers ***/ STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) @@ -891,6 +865,62 @@ fp_unavailable_common: bl .kernel_fp_unavailable_exception BUG_OPCODE +/* + * load_up_fpu(unused, unused, tsk) + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch (ie, no lazy save of the FP registers). + * On entry: r13 == 'current' && last_task_used_math != 'current' + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_math@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save FP state to last_task_used_math's THREAD struct */ + addi r4,r4,THREAD + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + /* Disable FP for last_task_used_math */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r6,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) + lfd fr0,THREAD_FPSCR(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + .align 7 .globl altivec_unavailable_common altivec_unavailable_common: @@ -906,6 +936,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl .altivec_unavailable_exception b .ret_from_except +#ifdef CONFIG_ALTIVEC +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ + /* * Hash table stuff */ @@ -1152,6 +1256,27 @@ unrecov_slb: bl .unrecoverable_exception b 1b +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + .space PAGE_SIZE + + /* + * Space for the initial segment table + * For LPAR, the hypervisor must fill in at least one entry + * before we get control (with relocate on) + */ + . = STAB0_PHYS_ADDR + .globl __start_stab +__start_stab: + + . = (STAB0_PHYS_ADDR + PAGE_SIZE) + .globl __end_stab +__end_stab: /* * On pSeries, secondary processors spin in the following code. @@ -1415,62 +1540,6 @@ _GLOBAL(copy_and_flush) .align 8 copy_to_here: -/* - * load_up_fpu(unused, unused, tsk) - * Disable FP for the task which had the FPU previously, - * and save its floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - * On SMP we know the fpu is free, since we give it up every - * switch (ie, no lazy save of the FP registers). - * On entry: r13 == 'current' && last_task_used_math != 'current' - */ -_STATIC(load_up_fpu) - mfmsr r5 /* grab the current MSR */ - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - * - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_math@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save FP state to last_task_used_math's THREAD struct */ - addi r4,r4,THREAD - SAVE_32FPRS(0, r4) - mffs fr0 - stfd fr0,THREAD_FPSCR(r4) - /* Disable FP for last_task_used_math */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r6,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* enable use of FP after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - ld r4,THREAD_FPEXC_MODE(r5) - ori r12,r12,MSR_FP - or r12,r12,r4 - std r12,_MSR(r1) - lfd fr0,THREAD_FPSCR(r5) - mtfsf 0xff,fr0 - REST_32FPRS(0, r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - /* * disable_kernel_fp() * Disable the FPU. @@ -1515,81 +1584,7 @@ _GLOBAL(giveup_fpu) #endif /* CONFIG_SMP */ blr - #ifdef CONFIG_ALTIVEC - -/* - * load_up_altivec(unused, unused, tsk) - * Disable VMX for the task which had it previously, - * and save its vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - * On SMP we know the VMX is free, since we give it up every - * switch (ie, no lazy save of the vector registers). - * On entry: r13 == 'current' && last_task_used_altivec != 'current' - */ -_STATIC(load_up_altivec) - mfmsr r5 /* grab the current MSR */ - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_altivec@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save VMX state to last_task_used_altivec's THREAD struct */ - addi r4,r4,THREAD - SAVE_32VRS(0,r5,r4) - mfvscr vr0 - li r10,THREAD_VSCR - stvx vr0,r10,r4 - /* Disable VMX for last_task_used_altivec */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VEC@h - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's - */ - mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 - bne+ 1f - li r4,-1 - mtspr SPRN_VRSAVE,r4 -1: - /* enable use of VMX after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - oris r12,r12,MSR_VEC@h - std r12,_MSR(r1) - li r4,1 - li r10,THREAD_VSCR - stw r4,THREAD_USED_VR(r5) - lvx vr0,r10,r5 - mtvscr vr0 - REST_32VRS(0,r4,r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - /* * disable_kernel_altivec() * Disable the VMX. From c59c464a3e29830bcfae5eea1777cad9e00087f3 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 227/584] [PATCH] Change address of ppc64 initial segment table On ppc64 machines with segment tables, CPU0's segment table is at a fixed address, currently 0x9000. This patch moves it to the free space at 0x6000, just below the fwnmi data area. This saves 8k of space in vmlinux and the runtime kernel image. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 32 +++++++++++++++++--------------- arch/ppc64/kernel/pacaData.c | 4 ++-- include/asm-ppc64/mmu.h | 7 +++++-- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index eb54f0548b01..7de38ebbe973 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -52,9 +52,10 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x6fff : interrupt support, iSeries and common interrupt prologs + * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x6000 - 0x6fff : Initial (CPU0) segment table * 0x7000 - 0x7fff : FWNMI data area - * 0x9000 - 0x9fff : Initial segment table + * 0x8000 - : Early init and support code */ /* @@ -1256,6 +1257,20 @@ unrecov_slb: bl .unrecoverable_exception b 1b +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is give to the hv + * as a page number (see xLparMap in LparData.c), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_PHYS_ADDR /* 0x6000 */ + .globl initial_stab +initial_stab: + .space 4096 + /* * Data area reserved for FWNMI option. * This address (0x7000) is fixed by the RPA. @@ -1265,19 +1280,6 @@ unrecov_slb: fwnmi_data_area: .space PAGE_SIZE - /* - * Space for the initial segment table - * For LPAR, the hypervisor must fill in at least one entry - * before we get control (with relocate on) - */ - . = STAB0_PHYS_ADDR - .globl __start_stab -__start_stab: - - . = (STAB0_PHYS_ADDR + PAGE_SIZE) - .globl __end_stab -__end_stab: - /* * On pSeries, secondary processors spin in the following code. * At entry, r3 = this processor's number (physical cpu id) diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index 6316188737b6..6182a2cd90a5 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c @@ -78,7 +78,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ + PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ PACA_INIT_ISERIES(number) \ } @@ -90,7 +90,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ } #endif diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 959a4bfdcd6a..789c2693483c 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -28,9 +28,12 @@ #define STE_VSID_SHIFT 12 /* Location of cpu0's segment table */ -#define STAB0_PAGE 0x9 +#define STAB0_PAGE 0x6 #define STAB0_PHYS_ADDR (STAB0_PAGE< Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 228/584] [PATCH] Remove general use functions from head.S As well as the interrupt vectors and initialization code, head.S contains several asm functions which are used during runtime. This patch moves these to misc.S, a more sensible location for random asm support code. A couple The functions moved are: disable_kernel_fp giveup_fpu disable_kernel_altivec giveup_altivec __setup_cpu_power3 (empty function) Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 95 -------------------------------------- arch/ppc64/kernel/misc.S | 98 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 95 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 7de38ebbe973..70e10403b697 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1542,98 +1542,6 @@ _GLOBAL(copy_and_flush) .align 8 copy_to_here: -/* - * disable_kernel_fp() - * Disable the FPU. - */ -_GLOBAL(disable_kernel_fp) - mfmsr r3 - rldicl r0,r3,(63-MSR_FP_LG),1 - rldicl r3,r0,(MSR_FP_LG+1),0 - mtmsrd r3 /* disable use of fpu now */ - isync - blr - -/* - * giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32FPRS(0, r3) - mffs fr0 - stfd fr0,THREAD_FPSCR(r3) - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_math@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#ifdef CONFIG_ALTIVEC -/* - * disable_kernel_altivec() - * Disable the VMX. - */ -_GLOBAL(disable_kernel_altivec) - mfmsr r3 - rldicl r0,r3,(63-MSR_VEC_LG),1 - rldicl r3,r0,(MSR_VEC_LG+1),0 - mtmsrd r3 /* disable use of VMX now */ - isync - blr - -/* - * giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32VRS(0,r4,r3) - mfvscr vr0 - li r4,THREAD_VSCR - stvx vr0,r4,r3 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VEC@h - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_altivec@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#endif /* CONFIG_ALTIVEC */ - #ifdef CONFIG_SMP #ifdef CONFIG_PPC_PMAC /* @@ -1984,9 +1892,6 @@ _STATIC(start_here_common) bl .start_kernel -_GLOBAL(__setup_cpu_power3) - blr - _GLOBAL(hmt_init) #ifdef CONFIG_HMT LOADADDR(r5, hmt_thread_data) diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index a05b50b738e9..474df0a862bf 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -680,6 +680,104 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR(r3) + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_math@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#ifdef CONFIG_ALTIVEC + +#if 0 /* this has no callers for now */ +/* + * disable_kernel_altivec() + * Disable the VMX. + */ +_GLOBAL(disable_kernel_altivec) + mfmsr r3 + rldicl r0,r3,(63-MSR_VEC_LG),1 + rldicl r3,r0,(MSR_VEC_LG+1),0 + mtmsrd r3 /* disable use of VMX now */ + isync + blr +#endif /* 0 */ + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_altivec@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_ALTIVEC */ + +_GLOBAL(__setup_cpu_power3) + blr + /* kexec_wait(phys_cpu) * * wait for the flag to change, indicating this kernel is going away but From 60ba44945714d9b7dae8b85ab0926f6f13809c73 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 229/584] [PATCH] Fix apparent code overlap in ppc64 head.S An #if/#else construct near the top of ppc64's head.S appears to create overlapping sections of code for iSeries and pSeries (i.e. one thing on iSeries and something different in the same place on pSeries). In fact, checking the various absolute offsets, it doesn't. This patch unravels the #ifdefs to make it more obvious what's going on. This accomplishes another microstep towards a single kernel image which can boot both iSeries and pSeries. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 70e10403b697..1cc9f203c22f 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -93,6 +93,7 @@ END_FTR_SECTION(0, 1) /* Catch branch to 0 in real mode */ trap + #ifdef CONFIG_PPC_ISERIES /* * At offset 0x20, there is a pointer to iSeries LPAR data. @@ -119,7 +120,7 @@ embedded_sysmap_start: embedded_sysmap_end: .llong 0 -#else /* CONFIG_PPC_ISERIES */ +#endif /* CONFIG_PPC_ISERIES */ /* Secondary processors spin on this value until it goes to 1. */ .globl __secondary_hold_spinloop @@ -169,7 +170,6 @@ _GLOBAL(__secondary_hold) BUG_OPCODE #endif #endif -#endif /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" From 1d086e6bd605ac44154e019fe96ae3568e8b2ba2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 230/584] [PATCH] Remove unneeded #defines in head.S arch/ppc64/kernel/head.S #defines SECONDARY_PROCESSORS then has some #ifdefs based on it. Whatever purpose this had is long lost, this patch removes it. Likewise, head.S defines H_SET_ASR, which is now defined, along with other hypervisor call numbers in hvcall.h. This patch deletes it, as well, from head.S. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 1cc9f203c22f..bf2345eb133c 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -23,8 +23,6 @@ * 2 of the License, or (at your option) any later version. */ -#define SECONDARY_PROCESSORS - #include #include #include @@ -43,11 +41,6 @@ #define DO_SOFT_DISABLE #endif -/* - * hcall interface to pSeries LPAR - */ -#define H_SET_ASR 0x30 - /* * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code @@ -629,9 +622,7 @@ system_reset_iSeries: cmpwi 0,r23,0 beq iSeries_secondary_smp_loop /* Loop until told to go */ -#ifdef SECONDARY_PROCESSORS bne .__secondary_start /* Loop until told to go */ -#endif iSeries_secondary_smp_loop: /* Let the Hypervisor know we are alive */ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ @@ -1325,9 +1316,7 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi 0,r23,0 #ifdef CONFIG_SMP -#ifdef SECONDARY_PROCESSORS bne .__secondary_start -#endif #endif b 3b /* Loop until told to go */ From 91a57fc6723d778e12686b5106a38583072fd767 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 231/584] [PATCH] Tweak comments in ppc64 head.S This patch adjust some comments in head.S for accuracy, clarity, and spelling. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index bf2345eb133c..c7462fae7662 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -148,7 +148,7 @@ _GLOBAL(__secondary_hold) std r24,__secondary_hold_acknowledge@l(0) sync - /* All secondary cpu's wait here until told to start. */ + /* All secondary cpus wait here until told to start. */ 100: ld r4,__secondary_hold_spinloop@l(0) cmpdi 0,r4,1 bne 100b @@ -703,8 +703,8 @@ machine_check_common: * R9 contains the saved CR, r13 points to the paca, * r10 contains the (bad) kernel stack pointer, * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using the paca guard page as an emergency stack, - * save the registers there, and call kernel_bad_stack(), which panics. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. */ bad_stack: ld r1,PACAEMERGSP(r13) @@ -1303,7 +1303,7 @@ _GLOBAL(pSeries_secondary_smp_init) b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ - /* From now on, r24 is expected to be logica cpuid */ + /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 3: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ From 6fbb49d56d228b666cb4534bbc3c2dfe833c8053 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 232/584] [PATCH] Move variables in ppc64 head.S from .data to .bss The ppc64 head.S defines several zero-initialized structures, such as the empty_zero_page and the kernel top-level pagetable. Currently they are defined to be in the data section. However, they're not used until after the bss is cleared, so this patch moves them to the bss, saving two and a half pages from the vmlinux. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index c7462fae7662..a0ff707d6fea 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1971,20 +1971,19 @@ _GLOBAL(smp_release_cpus) /* * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. + * This stuff goes at the beginning of the bss, which is page-aligned. */ - .data + .section ".bss" + .align 12 - .globl sdata -sdata: + .globl empty_zero_page empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PAGE_SIZE /* * This space gets a copy of optional info passed to us by the bootstrap From 7a6af5e38054d8e658a4b1b703902331a845de1a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:32:30 +1000 Subject: [PATCH 233/584] [PATCH] ppc64: remove firmware features from cpu_spec The firmware_features field of struct cpu_spec should really be a separate variable as the firmware features do not depend on the chip and the bitmask is constructed independently. By removing it, we save 112 bytes from the cpu_specs array and we access the bitmask directly instead of via the cur_cpu_spec pointer. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/cputable.c | 16 +--------------- arch/ppc64/kernel/lparcfg.c | 4 ++-- arch/ppc64/kernel/pSeries_iommu.c | 2 +- arch/ppc64/kernel/pSeries_setup.c | 12 ++++++------ arch/ppc64/kernel/pSeries_smp.c | 2 +- arch/ppc64/kernel/process.c | 2 +- arch/ppc64/kernel/sysfs.c | 2 +- arch/ppc64/kernel/time.c | 2 +- include/asm-ppc64/cputable.h | 10 +++++----- 9 files changed, 19 insertions(+), 33 deletions(-) diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 77cec42f9525..84fdd27498a5 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -23,6 +23,7 @@ struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); +unsigned long ppc64_firmware_features; /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's @@ -60,7 +61,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Power3+ */ .pvr_mask = 0xffff0000, @@ -73,7 +73,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Northstar */ .pvr_mask = 0xffff0000, @@ -86,7 +85,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Pulsar */ .pvr_mask = 0xffff0000, @@ -99,7 +97,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* I-star */ .pvr_mask = 0xffff0000, @@ -112,7 +109,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* S-star */ .pvr_mask = 0xffff0000, @@ -125,7 +121,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Power4 */ .pvr_mask = 0xffff0000, @@ -138,7 +133,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* Power4+ */ .pvr_mask = 0xffff0000, @@ -151,7 +145,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970 */ .pvr_mask = 0xffff0000, @@ -166,7 +159,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970FX */ .pvr_mask = 0xffff0000, @@ -181,7 +173,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970MP */ .pvr_mask = 0xffff0000, @@ -196,7 +187,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -211,7 +201,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -226,7 +215,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* BE DD1.x */ .pvr_mask = 0xffff0000, @@ -241,7 +229,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, - .firmware_features = COMMON_PPC64_FW, }, { /* default match */ .pvr_mask = 0x00000000, @@ -254,7 +241,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, } }; diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 02e96627fa66..938353848cd0 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -377,7 +377,7 @@ static int lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { unsigned long h_entitled, h_unallocated; unsigned long h_aggregation, h_resource; unsigned long pool_idle_time, pool_procs; @@ -571,7 +571,7 @@ int __init lparcfg_init(void) mode_t mode = S_IRUSR; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { lparcfg_fops.write = lparcfg_write; mode |= S_IWUSR; } diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index 69130522a87e..a5786be9c654 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -546,7 +546,7 @@ void iommu_init_early_pSeries(void) } if (systemcfg->platform & PLATFORM_LPAR) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { + if (ppc64_firmware_features & FW_FEATURE_MULTITCE) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; } else { diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 5bec956e44a0..d3975ac71cfb 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -231,11 +231,11 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); /* Choose an idle loop */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; @@ -260,7 +260,7 @@ static int __init pSeries_init_panel(void) arch_initcall(pSeries_init_panel); -/* Build up the firmware_features bitmask field +/* Build up the ppc64_firmware_features bitmask field * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). */ @@ -272,7 +272,7 @@ void __init fw_feature_init(void) DBG(" -> fw_feature_init()\n"); - cur_cpu_spec->firmware_features = 0; + ppc64_firmware_features = 0; dn = of_find_node_by_path("/rtas"); if (dn == NULL) { printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); @@ -288,7 +288,7 @@ void __init fw_feature_init(void) if ((firmware_features_table[i].name) && (strcmp(firmware_features_table[i].name,hypertas))==0) { /* we have a match */ - cur_cpu_spec->firmware_features |= + ppc64_firmware_features |= (firmware_features_table[i].val); break; } @@ -302,7 +302,7 @@ void __init fw_feature_init(void) of_node_put(dn); no_rtas: printk(KERN_INFO "firmware_features = 0x%lx\n", - cur_cpu_spec->firmware_features); + ppc64_firmware_features); DBG(" <- fw_feature_init()\n"); } diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 62c55a123560..8312d324aaae 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -326,7 +326,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (cpu != boot_cpuid) xics_setup_cpu(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) vpa_init(cpu); cpu_clear(cpu, of_spin_map); diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index f7cae05e40fb..390f434283af 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -206,7 +206,7 @@ struct task_struct *__switch_to(struct task_struct *prev, /* purr is nothing but processor time base */ #if defined(CONFIG_PPC_PSERIES) - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 02b8ac4e0168..90b653c724fa 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -154,7 +154,7 @@ void ppc64_enable_pmcs(void) #ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 909462e1adea..1c05cee05315 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -372,7 +372,7 @@ int timer_interrupt(struct pt_regs * regs) /* collect purr register values often, for accurate calculations */ #if defined(CONFIG_PPC_PSERIES) - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d67fa9e26079..d55698a60e75 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -56,11 +56,6 @@ struct cpu_spec { * BHT, SPD, etc... from head.S before branching to identify_machine */ cpu_setup_t cpu_setup; - - /* This is used to identify firmware features which are available - * to the kernel. - */ - unsigned long firmware_features; }; extern struct cpu_spec cpu_specs[]; @@ -72,6 +67,11 @@ static inline unsigned long cpu_has_feature(unsigned long feature) } +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + /* firmware feature bitmask values */ #define FIRMWARE_MAX_FEATURES 63 From 1ababe11480d59d75be806804c71fa55d203a5a6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:35:25 +1000 Subject: [PATCH 234/584] [PATCH] ppc64: create firmware_has_feature() Create the firmware_has_feature() inline and move the firmware feature stuff into its own header file. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/cputable.c | 1 + arch/ppc64/kernel/lparcfg.c | 6 +-- arch/ppc64/kernel/pSeries_iommu.c | 3 +- arch/ppc64/kernel/pSeries_setup.c | 6 +-- arch/ppc64/kernel/pSeries_smp.c | 3 +- arch/ppc64/kernel/process.c | 3 +- arch/ppc64/kernel/sysfs.c | 3 +- arch/ppc64/kernel/time.c | 3 +- include/asm-ppc64/cputable.h | 47 ++-------------------- include/asm-ppc64/firmware.h | 67 +++++++++++++++++++++++++++++++ 10 files changed, 87 insertions(+), 55 deletions(-) create mode 100644 include/asm-ppc64/firmware.h diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 84fdd27498a5..6294fc7bd442 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -20,6 +20,7 @@ #include #include +#include struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 938353848cd0..9d034ff062b1 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -377,7 +377,7 @@ static int lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { unsigned long h_entitled, h_unallocated; unsigned long h_aggregation, h_resource; unsigned long pool_idle_time, pool_procs; @@ -571,7 +571,7 @@ int __init lparcfg_init(void) mode_t mode = S_IRUSR; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { lparcfg_fops.write = lparcfg_write; mode |= S_IWUSR; } diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index a5786be9c654..9d5e1e7fc389 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "pci.h" #define DBG(fmt...) @@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void) } if (systemcfg->platform & PLATFORM_LPAR) { - if (ppc64_firmware_features & FW_FEATURE_MULTITCE) { + if (firmware_has_feature(FW_FEATURE_MULTITCE)) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; } else { diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index d3975ac71cfb..0058f32a3d89 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include "i8259.h" #include "mpic.h" @@ -231,11 +231,11 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) vpa_init(boot_cpuid); /* Choose an idle loop */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 8312d324aaae..79c7f3223665 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (cpu != boot_cpuid) xics_setup_cpu(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) vpa_init(cpu); cpu_clear(cpu, of_spin_map); diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 390f434283af..9bad983333b1 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -206,7 +207,7 @@ struct task_struct *__switch_to(struct task_struct *prev, /* purr is nothing but processor time base */ #if defined(CONFIG_PPC_PSERIES) - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 90b653c724fa..e399963521c0 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -154,7 +155,7 @@ void ppc64_enable_pmcs(void) #ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) get_paca()->lppaca.pmcregs_in_use = 1; #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 1c05cee05315..d523056fd660 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -67,6 +67,7 @@ #include #include #include +#include u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; @@ -372,7 +373,7 @@ int timer_interrupt(struct pt_regs * regs) /* collect purr register values often, for accurate calculations */ #if defined(CONFIG_PPC_PSERIES) - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d55698a60e75..ae6cf3830108 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -66,44 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature) return cur_cpu_spec->cpu_features & feature; } - -/* This is used to identify firmware features which are available - * to the kernel. - */ -extern unsigned long ppc64_firmware_features; - -/* firmware feature bitmask values */ -#define FIRMWARE_MAX_FEATURES 63 - -#define FW_FEATURE_PFT (1UL<<0) -#define FW_FEATURE_TCE (1UL<<1) -#define FW_FEATURE_SPRG0 (1UL<<2) -#define FW_FEATURE_DABR (1UL<<3) -#define FW_FEATURE_COPY (1UL<<4) -#define FW_FEATURE_ASR (1UL<<5) -#define FW_FEATURE_DEBUG (1UL<<6) -#define FW_FEATURE_TERM (1UL<<7) -#define FW_FEATURE_PERF (1UL<<8) -#define FW_FEATURE_DUMP (1UL<<9) -#define FW_FEATURE_INTERRUPT (1UL<<10) -#define FW_FEATURE_MIGRATE (1UL<<11) -#define FW_FEATURE_PERFMON (1UL<<12) -#define FW_FEATURE_CRQ (1UL<<13) -#define FW_FEATURE_VIO (1UL<<14) -#define FW_FEATURE_RDMA (1UL<<15) -#define FW_FEATURE_LLAN (1UL<<16) -#define FW_FEATURE_BULK (1UL<<17) -#define FW_FEATURE_XDABR (1UL<<18) -#define FW_FEATURE_MULTITCE (1UL<<19) -#define FW_FEATURE_SPLPAR (1UL<<20) - -typedef struct { - unsigned long val; - char * name; -} firmware_feature_t; - -extern firmware_feature_t firmware_features_table[]; - #endif /* __ASSEMBLY__ */ /* CPU kernel features */ @@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) -/* Platform firmware features */ -#define FW_FTR_ ASM_CONST(0x0000000000000001) - #ifndef __ASSEMBLY__ + #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) @@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) #else #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) -#endif +#endif /* CONFIG_PPC_ISERIES */ -#define COMMON_PPC64_FW (0) -#endif +#endif /* __ASSEMBLY */ #ifdef __ASSEMBLY__ diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h new file mode 100644 index 000000000000..5bb5bf46bb4a --- /dev/null +++ b/include/asm-ppc64/firmware.h @@ -0,0 +1,67 @@ +/* + * include/asm-ppc64/firmware.h + * + * Extracted from include/asm-ppc64/cputable.h + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_PPC_FIRMWARE_H +#define __ASM_PPC_FIRMWARE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +/* firmware feature bitmask values */ +#define FIRMWARE_MAX_FEATURES 63 + +#define FW_FEATURE_PFT (1UL<<0) +#define FW_FEATURE_TCE (1UL<<1) +#define FW_FEATURE_SPRG0 (1UL<<2) +#define FW_FEATURE_DABR (1UL<<3) +#define FW_FEATURE_COPY (1UL<<4) +#define FW_FEATURE_ASR (1UL<<5) +#define FW_FEATURE_DEBUG (1UL<<6) +#define FW_FEATURE_TERM (1UL<<7) +#define FW_FEATURE_PERF (1UL<<8) +#define FW_FEATURE_DUMP (1UL<<9) +#define FW_FEATURE_INTERRUPT (1UL<<10) +#define FW_FEATURE_MIGRATE (1UL<<11) +#define FW_FEATURE_PERFMON (1UL<<12) +#define FW_FEATURE_CRQ (1UL<<13) +#define FW_FEATURE_VIO (1UL<<14) +#define FW_FEATURE_RDMA (1UL<<15) +#define FW_FEATURE_LLAN (1UL<<16) +#define FW_FEATURE_BULK (1UL<<17) +#define FW_FEATURE_XDABR (1UL<<18) +#define FW_FEATURE_MULTITCE (1UL<<19) +#define FW_FEATURE_SPLPAR (1UL<<20) + +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + +static inline unsigned long firmware_has_feature(unsigned long feature) +{ + return ppc64_firmware_features & feature; +} + +typedef struct { + unsigned long val; + char * name; +} firmware_feature_t; + +extern firmware_feature_t firmware_features_table[]; + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* __ASM_PPC_FIRMWARE_H */ From 8d15a3e55f49678b0900dcf5c1cddb322a129325 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:40:16 +1000 Subject: [PATCH 235/584] [PATCH] ppc64: make firmware_has_feature() stronger Make firmware_has_feature() evaluate at compile time for the non pSeries case and tidy up code where possible. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 2 +- arch/ppc64/kernel/cputable.c | 27 +----------------- arch/ppc64/kernel/firmware.c | 47 +++++++++++++++++++++++++++++++ arch/ppc64/kernel/pSeries_setup.c | 4 +-- arch/ppc64/kernel/process.c | 9 ++---- arch/ppc64/kernel/sysfs.c | 2 -- arch/ppc64/kernel/time.c | 4 +-- include/asm-ppc64/firmware.h | 20 ++++++++++++- 8 files changed, 73 insertions(+), 42 deletions(-) create mode 100644 arch/ppc64/kernel/firmware.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index cbf87dcac92a..f4b3bfcc109d 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ - iommu.o sysfs.o vdso.o pmc.o + iommu.o sysfs.o vdso.o pmc.o firmware.o obj-y += vdso32/ vdso64/ obj-$(CONFIG_PPC_OF) += of_device.o diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 6294fc7bd442..4847f2ac8c9f 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -5,7 +5,7 @@ * * Modifications for ppc64: * Copyright (C) 2003 Dave Engebretsen - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,11 +20,9 @@ #include #include -#include struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); -unsigned long ppc64_firmware_features; /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's @@ -244,26 +242,3 @@ struct cpu_spec cpu_specs[] = { .cpu_setup = __setup_cpu_power4, } }; - -firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, -}; diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c new file mode 100644 index 000000000000..d8432c0fb27d --- /dev/null +++ b/arch/ppc64/kernel/firmware.c @@ -0,0 +1,47 @@ +/* + * arch/ppc64/kernel/firmware.c + * + * Extracted from cputable.c + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * Copyright (C) 2005 Stephen Rothwell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include + +unsigned long ppc64_firmware_features; + +#ifdef CONFIG_PPC_PSERIES +firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, +}; +#endif diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 0058f32a3d89..7ae7a2ca1085 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -231,11 +231,9 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - vpa_init(boot_cpuid); - /* Choose an idle loop */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + vpa_init(boot_cpuid); if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 9bad983333b1..7a7e027653ad 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -203,10 +203,9 @@ struct task_struct *__switch_to(struct task_struct *prev, new_thread = &new->thread; old_thread = ¤t->thread; -/* Collect purr utilization data per process and per processor wise */ -/* purr is nothing but processor time base */ - -#if defined(CONFIG_PPC_PSERIES) + /* Collect purr utilization data per process and per processor + * wise purr is nothing but processor time base + */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; @@ -215,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } -#endif - local_irq_save(flags); last = _switch(old_thread, new_thread); diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index e399963521c0..eca15d25e026 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -153,11 +153,9 @@ void ppc64_enable_pmcs(void) break; } -#ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) get_paca()->lppaca.pmcregs_in_use = 1; -#endif /* CONFIG_PPC_PSERIES */ } #else diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index d523056fd660..1696e1b05bb9 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -371,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs) process_hvlpevents(regs); #endif -/* collect purr register values often, for accurate calculations */ -#if defined(CONFIG_PPC_PSERIES) + /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } -#endif irq_exit(); diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index 5bb5bf46bb4a..e3725f3c8ea6 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -45,6 +45,22 @@ #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +enum { + FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | + FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | + FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | + FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | + FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | + FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | + FW_FEATURE_SPLPAR, + FW_FEATURE_POSSIBLE = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES | +#endif + 0, +}; + /* This is used to identify firmware features which are available * to the kernel. */ @@ -52,15 +68,17 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature; + return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; } +#ifdef CONFIG_PPC_PSERIES typedef struct { unsigned long val; char * name; } firmware_feature_t; extern firmware_feature_t firmware_features_table[]; +#endif #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ From aed31351941aa990fb0865c186565a589c56d3fe Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:43:21 +1000 Subject: [PATCH 236/584] [PATCH] ppc64: introduce FW_FEATURE_ISERIES Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_setup.c | 3 +++ arch/ppc64/kernel/pSeries_lpar.c | 1 - arch/ppc64/kernel/pSeries_setup.c | 2 +- include/asm-ppc64/firmware.h | 22 +++++++++++++++++++--- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index a649edbb23b6..460e7df681a1 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "iSeries_setup.h" @@ -314,6 +315,8 @@ static void __init iSeries_init_early(void) DBG(" -> iSeries_init_early()\n"); + ppc64_firmware_features = FW_FEATURE_ISERIES; + ppcdbg_initialize(); #if defined(CONFIG_BLK_DEV_INITRD) diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 74dd144dcce8..56845543c891 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out); EXPORT_SYMBOL(plpar_hcall_norets); EXPORT_SYMBOL(plpar_hcall_8arg_2ret); -extern void fw_feature_init(void); extern void pSeries_find_serial_port(void); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 7ae7a2ca1085..54e0651ba3fd 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -262,7 +262,7 @@ arch_initcall(pSeries_init_panel); * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). */ -void __init fw_feature_init(void) +static void __init fw_feature_init(void) { struct device_node * dn; char * hypertas; diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index e3725f3c8ea6..22bb85cf60af 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -44,9 +44,10 @@ #define FW_FEATURE_XDABR (1UL<<18) #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +#define FW_FEATURE_ISERIES (1UL<<21) enum { - FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | @@ -54,11 +55,25 @@ enum { FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR, + FW_FEATURE_PSERIES_ALWAYS = 0, + FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, + FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES | + FW_FEATURE_PSERIES_POSSIBLE | +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_POSSIBLE | #endif 0, + FW_FEATURE_ALWAYS = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES_ALWAYS & +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_ALWAYS & +#endif + FW_FEATURE_POSSIBLE, }; /* This is used to identify firmware features which are available @@ -68,7 +83,8 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; + return (FW_FEATURE_ALWAYS & feature) || + (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); } #ifdef CONFIG_PPC_PSERIES From 38e85dc18036804ada8698951cfad4e6114fec1b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 237/584] [PATCH] ppc64: Remove PTRRELOC() from msChunks code The msChunks code was written to work on pSeries, but now it's only used on iSeries. This means there's no need to do PTRRELOC anymore, so remove it all. A few places were getting "extern reloc_offset()" from abs_addr.h, move it into system.h instead. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/LparData.c | 10 +--------- include/asm-ppc64/abs_addr.h | 36 ++++++++++-------------------------- include/asm-ppc64/system.h | 2 ++ 3 files changed, 13 insertions(+), 35 deletions(-) diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index 3b9a2600fec2..0ed77b2f7d5f 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -229,24 +229,16 @@ struct ItVpdAreas itVpdAreas = { struct msChunks msChunks; EXPORT_SYMBOL(msChunks); -/* Depending on whether this is called from iSeries or pSeries setup - * code, the location of the msChunks struct may or may not have - * to be reloc'd, so we force the caller to do that for us by passing - * in a pointer to the structure. - */ unsigned long msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - _msChunks->num_chunks = num_chunks; _msChunks->chunk_size = chunk_size; _msChunks->chunk_shift = __ilog2(chunk_size); _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; mem = _ALIGN(mem, sizeof(msChunks_entry)); - _msChunks->abs = (msChunks_entry *)(mem + offset); + _msChunks->abs = (msChunks_entry *)mem; mem += num_chunks * sizeof(msChunks_entry); return mem; diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 6d4e8e787058..93dc63ed4f2f 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -29,46 +29,30 @@ struct msChunks { extern struct msChunks msChunks; extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); -extern unsigned long reloc_offset(void); #ifdef CONFIG_MSCHUNKS -static inline unsigned long -chunk_to_addr(unsigned long chunk) +static inline unsigned long chunk_to_addr(unsigned long chunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return chunk << _msChunks->chunk_shift; + return chunk << msChunks.chunk_shift; } -static inline unsigned long -addr_to_chunk(unsigned long addr) +static inline unsigned long addr_to_chunk(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr >> _msChunks->chunk_shift; + return addr >> msChunks.chunk_shift; } -static inline unsigned long -chunk_offset(unsigned long addr) +static inline unsigned long chunk_offset(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr & _msChunks->chunk_mask; + return addr & msChunks.chunk_mask; } -static inline unsigned long -abs_chunk(unsigned long pchunk) +static inline unsigned long abs_chunk(unsigned long pchunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - if ( pchunk >= _msChunks->num_chunks ) { + if (pchunk >= msChunks.num_chunks) return pchunk; - } - return PTRRELOC(_msChunks->abs)[pchunk]; + + return msChunks.abs[pchunk]; } /* A macro so it can take pointers or unsigned long. */ diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 98d120ca8a91..4104a5dedbaa 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define arch_align_stack(x) (x) +extern unsigned long reloc_offset(void); + #endif /* __KERNEL__ */ #endif From 34c8f6961fc601294a38c5bd5ca12131b2e52674 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 238/584] [PATCH] ppc64: msChunks cleanups Chunks are 256KB, so use constants for the size/shift/mask, rather than getting them from the msChunks struct. The iSeries debugger (??) might still need access to the values in the msChunks struct, so we keep them around for now, but set them from the constant values. Replace msChunks_entry typedef with regular u32. Simplify msChunks_alloc() to manipulate klimit directly, rather than via a parameter. Move msChunks_alloc() and msChunks into iSeries_setup.c, as that's where they're used. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/LparData.c | 18 ------------------ arch/ppc64/kernel/iSeries_setup.c | 20 ++++++++++++++++++-- include/asm-ppc64/abs_addr.h | 15 +++++++++------ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index 0ed77b2f7d5f..0a9c23ca2f0c 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -225,21 +225,3 @@ struct ItVpdAreas itVpdAreas = { 0,0 } }; - -struct msChunks msChunks; -EXPORT_SYMBOL(msChunks); - -unsigned long -msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) -{ - _msChunks->num_chunks = num_chunks; - _msChunks->chunk_size = chunk_size; - _msChunks->chunk_shift = __ilog2(chunk_size); - _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; - - mem = _ALIGN(mem, sizeof(msChunks_entry)); - _msChunks->abs = (msChunks_entry *)mem; - mem += num_chunks * sizeof(msChunks_entry); - - return mem; -} diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index 460e7df681a1..e47984ba7c7c 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -415,6 +415,22 @@ static void __init iSeries_init_early(void) DBG(" <- iSeries_init_early()\n"); } +struct msChunks msChunks = { + /* XXX We don't use these, but Piranha might need them. */ + .chunk_size = MSCHUNKS_CHUNK_SIZE, + .chunk_shift = MSCHUNKS_CHUNK_SHIFT, + .chunk_mask = MSCHUNKS_OFFSET_MASK, +}; +EXPORT_SYMBOL(msChunks); + +void msChunks_alloc(unsigned long num_chunks) +{ + klimit = _ALIGN(klimit, sizeof(u32)); + msChunks.abs = (u32 *)klimit; + klimit += num_chunks * sizeof(u32); + msChunks.num_chunks = num_chunks; +} + /* * The iSeries may have very large memories ( > 128 GB ) and a partition * may get memory in "chunks" that may be anywhere in the 2**52 real @@ -452,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void) /* Chunk size on iSeries is 256K bytes */ totalChunks = (u32)HvLpConfig_getMsChunks(); - klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18); + msChunks_alloc(totalChunks); /* * Get absolute address of our load area @@ -498,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void) */ hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); hptSizePages = (u32)HvCallHpt_getHptPages(); - hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT); + hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); hptLastChunk = hptFirstChunk + hptSizeChunks - 1; printk("HPT absolute addr = %016lx, size = %dK\n", diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 93dc63ed4f2f..2276567f133a 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,34 +17,37 @@ #include #include -typedef u32 msChunks_entry; struct msChunks { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - msChunks_entry *abs; + u32 *abs; }; extern struct msChunks msChunks; -extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); #ifdef CONFIG_MSCHUNKS +/* Chunks are 256 KB */ +#define MSCHUNKS_CHUNK_SHIFT (18) +#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) +#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) + static inline unsigned long chunk_to_addr(unsigned long chunk) { - return chunk << msChunks.chunk_shift; + return chunk << MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long addr_to_chunk(unsigned long addr) { - return addr >> msChunks.chunk_shift; + return addr >> MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long chunk_offset(unsigned long addr) { - return addr & msChunks.chunk_mask; + return addr & MSCHUNKS_OFFSET_MASK; } static inline unsigned long abs_chunk(unsigned long pchunk) From 56e97b71bf55edb69dc8e9715553972ce50b1564 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 239/584] [PATCH] ppc64: Rename msChunks structure Rename the msChunks struct to get rid of the StUdlY caps and make it a bit clearer what it's for. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 4 ++-- arch/ppc64/kernel/iSeries_setup.c | 17 +++++++++-------- include/asm-ppc64/abs_addr.h | 15 +++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index a0ff707d6fea..cccec4902646 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -96,12 +96,12 @@ END_FTR_SECTION(0, 1) .llong hvReleaseData-KERNELBASE /* - * At offset 0x28 and 0x30 are offsets to the msChunks + * At offset 0x28 and 0x30 are offsets to the mschunks_map * array (used by the iSeries LPAR debugger to do translation * between physical addresses and absolute addresses) and * to the pidhash table (also used by the debugger) */ - .llong msChunks-KERNELBASE + .llong mschunks_map-KERNELBASE .llong 0 /* pidhash-KERNELBASE SFRXXX */ /* Offset 0x38 - Pointer to start of embedded System.map */ diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index e47984ba7c7c..b384a6ad0a57 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -415,20 +415,20 @@ static void __init iSeries_init_early(void) DBG(" <- iSeries_init_early()\n"); } -struct msChunks msChunks = { +struct mschunks_map mschunks_map = { /* XXX We don't use these, but Piranha might need them. */ .chunk_size = MSCHUNKS_CHUNK_SIZE, .chunk_shift = MSCHUNKS_CHUNK_SHIFT, .chunk_mask = MSCHUNKS_OFFSET_MASK, }; -EXPORT_SYMBOL(msChunks); +EXPORT_SYMBOL(mschunks_map); -void msChunks_alloc(unsigned long num_chunks) +void mschunks_alloc(unsigned long num_chunks) { klimit = _ALIGN(klimit, sizeof(u32)); - msChunks.abs = (u32 *)klimit; + mschunks_map.mapping = (u32 *)klimit; klimit += num_chunks * sizeof(u32); - msChunks.num_chunks = num_chunks; + mschunks_map.num_chunks = num_chunks; } /* @@ -468,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void) /* Chunk size on iSeries is 256K bytes */ totalChunks = (u32)HvLpConfig_getMsChunks(); - msChunks_alloc(totalChunks); + mschunks_alloc(totalChunks); /* * Get absolute address of our load area @@ -505,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void) printk("Load area size %dK\n", loadAreaSize * 256); for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) - msChunks.abs[nextPhysChunk] = + mschunks_map.mapping[nextPhysChunk] = loadAreaFirstChunk + nextPhysChunk; /* @@ -571,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void) (absChunk > hptLastChunk)) && ((absChunk < loadAreaFirstChunk) || (absChunk > loadAreaLastChunk))) { - msChunks.abs[nextPhysChunk] = absChunk; + mschunks_map.mapping[nextPhysChunk] = + absChunk; ++nextPhysChunk; } } diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 2276567f133a..05414a9bfdd1 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,18 +17,17 @@ #include #include -struct msChunks { +#ifdef CONFIG_MSCHUNKS + +struct mschunks_map { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - u32 *abs; + u32 *mapping; }; -extern struct msChunks msChunks; - - -#ifdef CONFIG_MSCHUNKS +extern struct mschunks_map mschunks_map; /* Chunks are 256 KB */ #define MSCHUNKS_CHUNK_SHIFT (18) @@ -52,10 +51,10 @@ static inline unsigned long chunk_offset(unsigned long addr) static inline unsigned long abs_chunk(unsigned long pchunk) { - if (pchunk >= msChunks.num_chunks) + if (pchunk >= mschunks_map.num_chunks) return pchunk; - return msChunks.abs[pchunk]; + return mschunks_map.mapping[pchunk]; } /* A macro so it can take pointers or unsigned long. */ From ce21795275ab469b97384faa36462350af17eca0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 240/584] [PATCH] ppc64: Consolidate some macros The only caller of chunk_offset() and abs_chunk() is phys_to_abs(), so fold the former two into the latter. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 05414a9bfdd1..025527742fef 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -44,25 +44,18 @@ static inline unsigned long addr_to_chunk(unsigned long addr) return addr >> MSCHUNKS_CHUNK_SHIFT; } -static inline unsigned long chunk_offset(unsigned long addr) +static inline unsigned long phys_to_abs(unsigned long pa) { - return addr & MSCHUNKS_OFFSET_MASK; + unsigned long chunk; + + chunk = addr_to_chunk(pa); + + if (chunk < mschunks_map.num_chunks) + chunk = mschunks_map.mapping[chunk]; + + return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -static inline unsigned long abs_chunk(unsigned long pchunk) -{ - if (pchunk >= mschunks_map.num_chunks) - return pchunk; - - return mschunks_map.mapping[pchunk]; -} - -/* A macro so it can take pointers or unsigned long. */ -#define phys_to_abs(pa) \ - ({ unsigned long _pa = (unsigned long)(pa); \ - chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \ - }) - static inline unsigned long physRpn_to_absRpn(unsigned long rpn) { From aefd16b0c5a594b5feaba23954ad74061f45c8a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:24 +1000 Subject: [PATCH 241/584] [PATCH] ppc64: Remove redundant uses of physRpn_to_absRpn physRpn_to_absRpn is a no-op on non-iSeries platforms, remove the two redundant calls. There's only one caller on iSeries so fold the logic in there so we can get rid of it completely. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_htab.c | 5 ++++- arch/ppc64/kernel/pSeries_lpar.c | 3 +-- arch/ppc64/mm/hash_native.c | 3 +-- include/asm-ppc64/abs_addr.h | 8 -------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c index b0250ae4a72a..2192055a90a0 100644 --- a/arch/ppc64/kernel/iSeries_htab.c +++ b/arch/ppc64/kernel/iSeries_htab.c @@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { + unsigned long arpn; long slot; hpte_t lhpte; int secondary = 0; @@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, slot &= 0x7fffffffffffffff; } + arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; + lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; - lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags; + lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ HvCallHpt_addValidate(slot, secondary, &lhpte); diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 56845543c891..0a3ddc9227c5 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -278,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { - unsigned long arpn = physRpn_to_absRpn(prpn); unsigned long lpar_rc; unsigned long flags; unsigned long slot; @@ -289,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, if (vflags & HPTE_V_LARGE) hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index a6abd3a979bf..7626bb59954d 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { - unsigned long arpn = physRpn_to_absRpn(prpn); hpte_t *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; int i; @@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; if (vflags & HPTE_V_LARGE) va &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; hptep->r = hpte_r; /* Guarantee the second dword is visible before the valid bit */ diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 025527742fef..ab4320c1cf5b 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,14 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -static inline unsigned long -physRpn_to_absRpn(unsigned long rpn) -{ - unsigned long pa = rpn << PAGE_SHIFT; - unsigned long aa = phys_to_abs(pa); - return (aa >> PAGE_SHIFT); -} - /* A macro so it can take pointers or unsigned long. */ #define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) From a4a0f97020444f83bf22bb9c8c20d8af2b4e6317 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:24 +1000 Subject: [PATCH 242/584] [PATCH] ppc64: Remove redundant use of pointers in lmb code The lmb code is all written to use a pointer to an lmb struct. But it's always the same lmb struct, called "lmb". So we take the address of lmb, call it _lmb and then start using _lmb->foo everywhere, which is silly. This patch removes the _lmb pointers and replaces them with direct references to the one "lmb" struct. We do the same for some _mem and _rsv pointers which point to lmb.memory and lmb.reserved respectively. This patch looks quite busy, but it's basically just: s/_lmb->/lmb./g s/_mem->/lmb.memory./g s/_rsv->/lmb.reserved./g s/_rsv/&lmb.reserved/g s/mem->/lmb.memory./g Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 100 +++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 57 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index d6c6bd03d2a4..6cb275615fc4 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -28,33 +28,32 @@ void lmb_dump_all(void) { #ifdef DEBUG unsigned long i; - struct lmb *_lmb = &lmb; udbg_printf("lmb_dump_all:\n"); udbg_printf(" memory.cnt = 0x%lx\n", - _lmb->memory.cnt); + lmb.memory.cnt); udbg_printf(" memory.size = 0x%lx\n", - _lmb->memory.size); - for (i=0; i < _lmb->memory.cnt ;i++) { + lmb.memory.size); + for (i=0; i < lmb.memory.cnt ;i++) { udbg_printf(" memory.region[0x%x].base = 0x%lx\n", - i, _lmb->memory.region[i].base); + i, lmb.memory.region[i].base); udbg_printf(" .physbase = 0x%lx\n", - _lmb->memory.region[i].physbase); + lmb.memory.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - _lmb->memory.region[i].size); + lmb.memory.region[i].size); } udbg_printf("\n reserved.cnt = 0x%lx\n", - _lmb->reserved.cnt); + lmb.reserved.cnt); udbg_printf(" reserved.size = 0x%lx\n", - _lmb->reserved.size); - for (i=0; i < _lmb->reserved.cnt ;i++) { + lmb.reserved.size); + for (i=0; i < lmb.reserved.cnt ;i++) { udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", - i, _lmb->reserved.region[i].base); + i, lmb.reserved.region[i].base); udbg_printf(" .physbase = 0x%lx\n", - _lmb->reserved.region[i].physbase); + lmb.reserved.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - _lmb->reserved.region[i].size); + lmb.reserved.region[i].size); } #endif /* DEBUG */ } @@ -108,19 +107,17 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) void __init lmb_init(void) { - struct lmb *_lmb = &lmb; - /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ - _lmb->memory.region[0].base = 0; - _lmb->memory.region[0].size = 0; - _lmb->memory.cnt = 1; + lmb.memory.region[0].base = 0; + lmb.memory.region[0].size = 0; + lmb.memory.cnt = 1; /* Ditto. */ - _lmb->reserved.region[0].base = 0; - _lmb->reserved.region[0].size = 0; - _lmb->reserved.cnt = 1; + lmb.reserved.region[0].base = 0; + lmb.reserved.region[0].size = 0; + lmb.reserved.cnt = 1; } /* This routine called with relocation disabled. */ @@ -130,27 +127,26 @@ lmb_analyze(void) unsigned long i; unsigned long mem_size = 0; unsigned long size_mask = 0; - struct lmb *_lmb = &lmb; #ifdef CONFIG_MSCHUNKS unsigned long physbase = 0; #endif - for (i=0; i < _lmb->memory.cnt; i++) { + for (i=0; i < lmb.memory.cnt; i++) { unsigned long lmb_size; - lmb_size = _lmb->memory.region[i].size; + lmb_size = lmb.memory.region[i].size; #ifdef CONFIG_MSCHUNKS - _lmb->memory.region[i].physbase = physbase; + lmb.memory.region[i].physbase = physbase; physbase += lmb_size; #else - _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; + lmb.memory.region[i].physbase = lmb.memory.region[i].base; #endif mem_size += lmb_size; size_mask |= lmb_size; } - _lmb->memory.size = mem_size; + lmb.memory.size = mem_size; } /* This routine called with relocation disabled. */ @@ -213,12 +209,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) long __init lmb_add(unsigned long base, unsigned long size) { - struct lmb *_lmb = &lmb; - struct lmb_region *_rgn = &(_lmb->memory); + struct lmb_region *_rgn = &(lmb.memory); /* On pSeries LPAR systems, the first LMB is our RMO region. */ if ( base == 0 ) - _lmb->rmo_size = size; + lmb.rmo_size = size; return lmb_add_region(_rgn, base, size); @@ -227,8 +222,7 @@ lmb_add(unsigned long base, unsigned long size) long __init lmb_reserve(unsigned long base, unsigned long size) { - struct lmb *_lmb = &lmb; - struct lmb_region *_rgn = &(_lmb->reserved); + struct lmb_region *_rgn = &(lmb.reserved); return lmb_add_region(_rgn, base, size); } @@ -260,13 +254,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) { long i, j; unsigned long base = 0; - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - struct lmb_region *_rsv = &(_lmb->reserved); - for (i=_mem->cnt-1; i >= 0; i--) { - unsigned long lmbbase = _mem->region[i].base; - unsigned long lmbsize = _mem->region[i].size; + for (i=lmb.memory.cnt-1; i >= 0; i--) { + unsigned long lmbbase = lmb.memory.region[i].base; + unsigned long lmbsize = lmb.memory.region[i].size; if ( max_addr == LMB_ALLOC_ANYWHERE ) base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); @@ -276,8 +267,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) continue; while ( (lmbbase <= base) && - ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { - base = _ALIGN_DOWN(_rsv->region[j].base-size, align); + ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { + base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); } if ( (base != 0) && (lmbbase <= base) ) @@ -287,7 +278,7 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) if ( i < 0 ) return 0; - lmb_add_region(_rsv, base, size); + lmb_add_region(&lmb.reserved, base, size); return base; } @@ -295,17 +286,15 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) unsigned long __init lmb_phys_mem_size(void) { - struct lmb *_lmb = &lmb; #ifdef CONFIG_MSCHUNKS - return _lmb->memory.size; + return lmb.memory.size; #else - struct lmb_region *_mem = &(_lmb->memory); unsigned long total = 0; int i; /* add all physical memory to the bootmem map */ - for (i=0; i < _mem->cnt; i++) - total += _mem->region[i].size; + for (i=0; i < lmb.memory.cnt; i++) + total += lmb.memory.region[i].size; return total; #endif /* CONFIG_MSCHUNKS */ } @@ -313,14 +302,12 @@ lmb_phys_mem_size(void) unsigned long __init lmb_end_of_DRAM(void) { - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - int idx = _mem->cnt - 1; + int idx = lmb.memory.cnt - 1; #ifdef CONFIG_MSCHUNKS - return (_mem->region[idx].physbase + _mem->region[idx].size); + return (lmb.memory.region[idx].physbase + lmb.memory.region[idx].size); #else - return (_mem->region[idx].base + _mem->region[idx].size); + return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); #endif /* CONFIG_MSCHUNKS */ return 0; @@ -353,20 +340,19 @@ void __init lmb_enforce_memory_limit(void) { extern unsigned long memory_limit; unsigned long i, limit; - struct lmb_region *mem = &(lmb.memory); if (! memory_limit) return; limit = memory_limit; - for (i = 0; i < mem->cnt; i++) { - if (limit > mem->region[i].size) { - limit -= mem->region[i].size; + for (i = 0; i < lmb.memory.cnt; i++) { + if (limit > lmb.memory.region[i].size) { + limit -= lmb.memory.region[i].size; continue; } - mem->region[i].size = limit; - mem->cnt = i + 1; + lmb.memory.region[i].size = limit; + lmb.memory.cnt = i + 1; break; } } From e88bcd1b29f63738b702e57d831758706162347e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:25 +1000 Subject: [PATCH 243/584] [PATCH] ppc64: Remove redundant abs_to_phys() macro abs_to_phys() is a macro that turns out to do nothing, and also has the unfortunate property that it's not the inverse of phys_to_abs() on iSeries. The following is for my benefit as much as everyone else. With CONFIG_MSCHUNKS enabled, the lmb code is changed such that it keeps a physbase variable for each lmb region. This is used to take the possibly discontiguous lmb regions and present them as a contiguous address space beginning from zero. In this context each lmb region's base address is its "absolute" base address, and its physbase is it's "physical" address (from Linux's point of view). The abs_to_phys() macro does the mapping from "absolute" to "physical". Note: This is not related to the iSeries mapping of physical to absolute (ie. Hypervisor) addresses which is maintained with the msChunks structure. And the msChunks structure is not controlled via CONFIG_MSCHUNKS. Once upon a time you could compile for non-iSeries with CONFIG_MSCHUNKS enabled. But these days CONFIG_MSCHUNKS depends on CONFIG_PPC_ISERIES, so for non-iSeries code abs_to_phys() is a no-op. On iSeries we always have one lmb region which spans from 0 to systemcfg->physicalMemorySize (arch/ppc64/kernel/iSeries_setup.c line 383). This region has a base (ie. absolute) address of 0, and a physbase address of 0 (as calculated in lmb_analyze() (arch/ppc64/kernel/lmb.c line 144)). On iSeries, abs_to_phys(aa) is defined as lmb_abs_to_phys(aa), which finds the lmb region containing aa (and there's only one, ie. 0), and then does: return lmb.memory.region[0].physbase + (aa - lmb.memory.region[0].base) physbase == base == 0, so you're left with "return aa". So remove abs_to_phys(), and lmb_abs_to_phys() which is the implementation of abs_to_phys() for iSeries. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 19 ------------------- arch/ppc64/mm/init.c | 4 +--- include/asm-ppc64/abs_addr.h | 6 +----- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 6cb275615fc4..111da736652b 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -313,25 +313,6 @@ lmb_end_of_DRAM(void) return 0; } -unsigned long __init -lmb_abs_to_phys(unsigned long aa) -{ - unsigned long i, pa = aa; - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - - for (i=0; i < _mem->cnt; i++) { - unsigned long lmbbase = _mem->region[i].base; - unsigned long lmbsize = _mem->region[i].size; - if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { - pa = _mem->region[i].physbase + (aa - lmbbase); - break; - } - } - - return pa; -} - /* * Truncate the lmb list to memory_limit if it's set * You must call lmb_analyze() after this. diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 9edfe267123e..a16cf12c586b 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -42,7 +42,6 @@ #include #include -#include #include #include #include @@ -167,7 +166,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) ptep = pte_alloc_kernel(&init_mm, pmdp, ea); if (!ptep) return -ENOMEM; - pa = abs_to_phys(pa); set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); spin_unlock(&init_mm.page_table_lock); @@ -547,7 +545,7 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = abs_to_phys(lmb_alloc(bootmap_pages<> PAGE_SHIFT, total_pages); diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index ab4320c1cf5b..200db1c45f29 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,9 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -/* A macro so it can take pointers or unsigned long. */ -#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) - #else /* !CONFIG_MSCHUNKS */ #define chunk_to_addr(chunk) ((unsigned long)(chunk)) @@ -68,12 +65,11 @@ static inline unsigned long phys_to_abs(unsigned long pa) #define phys_to_abs(pa) (pa) #define physRpn_to_absRpn(rpn) (rpn) -#define abs_to_phys(aa) (aa) #endif /* !CONFIG_MSCHUNKS */ /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) -#define abs_to_virt(aa) __va(abs_to_phys(aa)) +#define abs_to_virt(aa) __va(aa) #endif /* _ABS_ADDR_H */ From 180379dcefb39e8bd05d562b0685e9084dffcc0a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 244/584] [PATCH] ppc64: Remove physbase from the lmb_property struct We no longer need the lmb code to know about abs and phys addresses, so remove the physbase variable from the lmb_property struct. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 23 ----------------------- arch/ppc64/mm/hash_utils.c | 2 +- arch/ppc64/mm/init.c | 27 ++++++++++++--------------- arch/ppc64/mm/numa.c | 2 +- include/asm-ppc64/lmb.h | 1 - 5 files changed, 14 insertions(+), 41 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 111da736652b..6ed6312d848f 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -37,8 +37,6 @@ void lmb_dump_all(void) for (i=0; i < lmb.memory.cnt ;i++) { udbg_printf(" memory.region[0x%x].base = 0x%lx\n", i, lmb.memory.region[i].base); - udbg_printf(" .physbase = 0x%lx\n", - lmb.memory.region[i].physbase); udbg_printf(" .size = 0x%lx\n", lmb.memory.region[i].size); } @@ -50,8 +48,6 @@ void lmb_dump_all(void) for (i=0; i < lmb.reserved.cnt ;i++) { udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", i, lmb.reserved.region[i].base); - udbg_printf(" .physbase = 0x%lx\n", - lmb.reserved.region[i].physbase); udbg_printf(" .size = 0x%lx\n", lmb.reserved.region[i].size); } @@ -97,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) rgn->region[r1].size += rgn->region[r2].size; for (i=r2; i < rgn->cnt-1; i++) { rgn->region[i].base = rgn->region[i+1].base; - rgn->region[i].physbase = rgn->region[i+1].physbase; rgn->region[i].size = rgn->region[i+1].size; } rgn->cnt--; @@ -127,21 +122,12 @@ lmb_analyze(void) unsigned long i; unsigned long mem_size = 0; unsigned long size_mask = 0; -#ifdef CONFIG_MSCHUNKS - unsigned long physbase = 0; -#endif for (i=0; i < lmb.memory.cnt; i++) { unsigned long lmb_size; lmb_size = lmb.memory.region[i].size; -#ifdef CONFIG_MSCHUNKS - lmb.memory.region[i].physbase = physbase; - physbase += lmb_size; -#else - lmb.memory.region[i].physbase = lmb.memory.region[i].base; -#endif mem_size += lmb_size; size_mask |= lmb_size; } @@ -164,7 +150,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); if ( adjacent > 0 ) { rgn->region[i].base -= size; - rgn->region[i].physbase -= size; rgn->region[i].size += size; coalesced++; break; @@ -191,11 +176,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) for (i=rgn->cnt-1; i >= 0; i--) { if (base < rgn->region[i].base) { rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].physbase = rgn->region[i].physbase; rgn->region[i+1].size = rgn->region[i].size; } else { rgn->region[i+1].base = base; - rgn->region[i+1].physbase = lmb_abs_to_phys(base); rgn->region[i+1].size = size; break; } @@ -304,13 +287,7 @@ lmb_end_of_DRAM(void) { int idx = lmb.memory.cnt - 1; -#ifdef CONFIG_MSCHUNKS - return (lmb.memory.region[idx].physbase + lmb.memory.region[idx].size); -#else return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -#endif /* CONFIG_MSCHUNKS */ - - return 0; } /* diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 65d6e8527948..09475c8edf7c 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -210,7 +210,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].physbase + KERNELBASE; + base = lmb.memory.region[i].base + KERNELBASE; size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index a16cf12c586b..c02dc9809ca5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -482,9 +482,9 @@ void __init mm_init_ppc64(void) for (i = 1; i < lmb.memory.cnt; i++) { unsigned long base, prevbase, prevsize; - prevbase = lmb.memory.region[i-1].physbase; + prevbase = lmb.memory.region[i-1].base; prevsize = lmb.memory.region[i-1].size; - base = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; if (base > (prevbase + prevsize)) { io_hole_start = prevbase + prevsize; io_hole_size = base - (prevbase + prevsize); @@ -511,11 +511,8 @@ int page_is_ram(unsigned long pfn) for (i=0; i < lmb.memory.cnt; i++) { unsigned long base; -#ifdef CONFIG_MSCHUNKS - base = lmb.memory.region[i].physbase; -#else base = lmb.memory.region[i].base; -#endif + if ((paddr >= base) && (paddr < (base + lmb.memory.region[i].size))) { return 1; @@ -556,25 +553,25 @@ void __init do_init_bootmem(void) * present. */ for (i=0; i < lmb.memory.cnt; i++) { - unsigned long physbase, size; + unsigned long base, size; unsigned long start_pfn, end_pfn; - physbase = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; size = lmb.memory.region[i].size; - start_pfn = physbase >> PAGE_SHIFT; + start_pfn = base >> PAGE_SHIFT; end_pfn = start_pfn + (size >> PAGE_SHIFT); memory_present(0, start_pfn, end_pfn); - free_bootmem(physbase, size); + free_bootmem(base, size); } /* reserve the sections we're already using */ for (i=0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long base = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; - reserve_bootmem(physbase, size); + reserve_bootmem(base, size); } } @@ -613,10 +610,10 @@ static int __init setup_kcore(void) int i; for (i=0; i < lmb.memory.cnt; i++) { - unsigned long physbase, size; + unsigned long base, size; struct kcore_list *kcore_mem; - physbase = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; size = lmb.memory.region[i].size; /* GFP_ATOMIC to avoid might_sleep warnings during boot */ @@ -624,7 +621,7 @@ static int __init setup_kcore(void) if (!kcore_mem) panic("mem_init: kmalloc failed\n"); - kclist_add(kcore_mem, __va(physbase), size); + kclist_add(kcore_mem, __va(base), size); } kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index 0b191f2de016..c3116f0d788c 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -671,7 +671,7 @@ new_range: * Mark reserved regions on this node */ for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long physbase = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; if (pa_to_nid(physbase) != nid && diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index a6cbca21ac1d..cb368bf0f264 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h @@ -22,7 +22,6 @@ struct lmb_property { unsigned long base; - unsigned long physbase; unsigned long size; }; From 71e1f55ad4bc4c8bcfe696400a950a34263a750e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 245/584] [PATCH] ppc64: Simplify some lmb functions lmb_phys_mem_size() can always return lmb.memory.size, as long as it's called after lmb_analyze(), which it is. There's no need to recalculate the size on every call. lmb_analyze() was calculating a few things we then threw away, so just don't calculate them to start with. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 6ed6312d848f..5adaca2ddc9d 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -119,20 +119,12 @@ lmb_init(void) void __init lmb_analyze(void) { - unsigned long i; - unsigned long mem_size = 0; - unsigned long size_mask = 0; + int i; - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long lmb_size; + lmb.memory.size = 0; - lmb_size = lmb.memory.region[i].size; - - mem_size += lmb_size; - size_mask |= lmb_size; - } - - lmb.memory.size = mem_size; + for (i = 0; i < lmb.memory.cnt; i++) + lmb.memory.size += lmb.memory.region[i].size; } /* This routine called with relocation disabled. */ @@ -266,20 +258,11 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) return base; } +/* You must call lmb_analyze() before this. */ unsigned long __init lmb_phys_mem_size(void) { -#ifdef CONFIG_MSCHUNKS return lmb.memory.size; -#else - unsigned long total = 0; - int i; - - /* add all physical memory to the bootmem map */ - for (i=0; i < lmb.memory.cnt; i++) - total += lmb.memory.region[i].size; - return total; -#endif /* CONFIG_MSCHUNKS */ } unsigned long __init From bef5686229810709091fb6e505071f4aa41e3760 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 246/584] [PATCH] ppc64: Remove CONFIG_MSCHUNKS We can now remove CONFIG_MSCHUNKS as it doesn't do anything interesting anymore. The only macro in abs_addr.h which is called by non-iSeries code is phys_to_abs(), so remove the other dummy implementations, and we add a firmware feature check to phys_to_abs(). Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/Kconfig | 6 ------ arch/ppc64/configs/iSeries_defconfig | 1 - include/asm-ppc64/abs_addr.h | 19 +++++-------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 4d4f81c65012..13b262f10216 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -302,12 +302,6 @@ config GENERIC_HARDIRQS bool default y -config MSCHUNKS - bool - depends on PPC_ISERIES - default y - - config PPC_RTAS bool depends on PPC_PSERIES || PPC_BPA diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig index 394ba18b58c7..219c6677abcc 100644 --- a/arch/ppc64/configs/iSeries_defconfig +++ b/arch/ppc64/configs/iSeries_defconfig @@ -99,7 +99,6 @@ CONFIG_HZ_100=y # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y -CONFIG_MSCHUNKS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 200db1c45f29..84c24d4cdb71 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -16,8 +16,7 @@ #include #include #include - -#ifdef CONFIG_MSCHUNKS +#include struct mschunks_map { unsigned long num_chunks; @@ -48,6 +47,10 @@ static inline unsigned long phys_to_abs(unsigned long pa) { unsigned long chunk; + /* This is a no-op on non-iSeries */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return pa; + chunk = addr_to_chunk(pa); if (chunk < mschunks_map.num_chunks) @@ -56,18 +59,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -#else /* !CONFIG_MSCHUNKS */ - -#define chunk_to_addr(chunk) ((unsigned long)(chunk)) -#define addr_to_chunk(addr) (addr) -#define chunk_offset(addr) (0) -#define abs_chunk(pchunk) (pchunk) - -#define phys_to_abs(pa) (pa) -#define physRpn_to_absRpn(rpn) (rpn) - -#endif /* !CONFIG_MSCHUNKS */ - /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) #define abs_to_virt(aa) __va(aa) From b13cfd173f73c3f6f9a307b7b6e64d45fbd756b2 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 4 Aug 2005 19:26:42 +0200 Subject: [PATCH 247/584] [PATCH] ppc64: allow xmon=off If both CONFIG_XMON and CONFIG_XMON_DEFAULT is enabled in the .config, there is no way to disable xmon again. setup_system calls first xmon_init, later parse_early_param. So a new 'xmon=off' cmdline option will do the right thing. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/setup.c | 8 +++++--- arch/ppc64/xmon/start.c | 2 +- arch/ppc64/xmon/xmon.c | 26 ++++++++++++++++++-------- include/asm-ppc64/system.h | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index e9c24d2dbd91..b3ef8df12982 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -627,7 +627,7 @@ void __init setup_system(void) * Initialize xmon */ #ifdef CONFIG_XMON_DEFAULT - xmon_init(); + xmon_init(1); #endif /* * Register early console @@ -1343,11 +1343,13 @@ static int __init early_xmon(char *p) /* ensure xmon is enabled */ if (p) { if (strncmp(p, "on", 2) == 0) - xmon_init(); + xmon_init(1); + if (strncmp(p, "off", 3) == 0) + xmon_init(0); if (strncmp(p, "early", 5) != 0) return 0; } - xmon_init(); + xmon_init(1); debugger(NULL); return 0; diff --git a/arch/ppc64/xmon/start.c b/arch/ppc64/xmon/start.c index a9265bcc79b2..f86b584acd76 100644 --- a/arch/ppc64/xmon/start.c +++ b/arch/ppc64/xmon/start.c @@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { /* ensure xmon is enabled */ - xmon_init(); + xmon_init(1); debugger(pt_regs); } diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c index 05539439e6bc..45908b10acd3 100644 --- a/arch/ppc64/xmon/xmon.c +++ b/arch/ppc64/xmon/xmon.c @@ -2496,15 +2496,25 @@ static void dump_stab(void) } } -void xmon_init(void) +void xmon_init(int enable) { - __debugger = xmon; - __debugger_ipi = xmon_ipi; - __debugger_bpt = xmon_bpt; - __debugger_sstep = xmon_sstep; - __debugger_iabr_match = xmon_iabr_match; - __debugger_dabr_match = xmon_dabr_match; - __debugger_fault_handler = xmon_fault_handler; + if (enable) { + __debugger = xmon; + __debugger_ipi = xmon_ipi; + __debugger_bpt = xmon_bpt; + __debugger_sstep = xmon_sstep; + __debugger_iabr_match = xmon_iabr_match; + __debugger_dabr_match = xmon_dabr_match; + __debugger_fault_handler = xmon_fault_handler; + } else { + __debugger = NULL; + __debugger_ipi = NULL; + __debugger_bpt = NULL; + __debugger_sstep = NULL; + __debugger_iabr_match = NULL; + __debugger_dabr_match = NULL; + __debugger_fault_handler = NULL; + } } void dump_segments(void) diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 4104a5dedbaa..b9e1835351e9 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match) DEBUGGER_BOILERPLATE(debugger_fault_handler) #ifdef CONFIG_XMON -extern void xmon_init(void); +extern void xmon_init(int enable); #endif #else From 180a33627d958d5d9d3602dde6ac74b315e136f0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 11:13:36 +1000 Subject: [PATCH 248/584] [PATCH] ppc64: Move ppc64_enable_pmcs() logic into a ppc_md function This patch moves power4_enable_pmcs() to arch/ppc64/kernel/pmc.c. I've tested it on P5 LPAR and P4. It does what it used to. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_setup.c | 2 ++ arch/ppc64/kernel/pSeries_setup.c | 21 ++++++++++++ arch/ppc64/kernel/pmac_setup.c | 2 ++ arch/ppc64/kernel/pmc.c | 21 ++++++++++++ arch/ppc64/kernel/sysfs.c | 54 +++---------------------------- include/asm-ppc64/machdep.h | 3 ++ include/asm-ppc64/pmc.h | 2 ++ 7 files changed, 55 insertions(+), 50 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b384a6ad0a57..3ffefbbc6623 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -964,6 +964,8 @@ void __init iSeries_early_setup(void) ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + /* XXX Implement enable_pmcs for iSeries */ + if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; printk(KERN_INFO "Using shared processor idle loop\n"); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 54e0651ba3fd..f0f0630cf07c 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -61,6 +61,7 @@ #include #include #include +#include #include "i8259.h" #include "mpic.h" @@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void) " MPIC "); } +static void pseries_lpar_enable_pmcs(void) +{ + unsigned long set, reset; + + power4_enable_pmcs(); + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); + + /* instruct hypervisor to maintain PMCs */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + get_paca()->lppaca.pmcregs_in_use = 1; +} + static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ @@ -245,6 +261,11 @@ static void __init pSeries_setup_arch(void) printk(KERN_INFO "Using default idle loop\n"); ppc_md.idle_loop = default_idle; } + + if (systemcfg->platform & PLATFORM_LPAR) + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; + else + ppc_md.enable_pmcs = power4_enable_pmcs; } static int __init pSeries_init_panel(void) diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index e40877fa67cd..8ff86a766cdf 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -71,6 +71,7 @@ #include #include #include +#include #include "pmac.h" #include "mpic.h" @@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = { .progress = pmac_progress, .check_legacy_ioport = pmac_check_legacy_ioport, .idle_loop = native_idle, + .enable_pmcs = power4_enable_pmcs, }; diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c index 67be773f9c00..cdfec7438d01 100644 --- a/arch/ppc64/kernel/pmc.c +++ b/arch/ppc64/kernel/pmc.c @@ -65,3 +65,24 @@ void release_pmc_hardware(void) spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); + +void power4_enable_pmcs(void) +{ + unsigned long hid0; + + hid0 = mfspr(HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): + "memory"); +} diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index eca15d25e026..f311ee7c0070 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -101,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str) } __setup("smt-snooze-delay=", setup_smt_snooze_delay); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -110,63 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled); void ppc64_enable_pmcs(void) { - unsigned long hid0; -#ifdef CONFIG_PPC_PSERIES - unsigned long set, reset; -#endif /* CONFIG_PPC_PSERIES */ - /* Only need to enable them once */ if (__get_cpu_var(pmcs_enabled)) return; __get_cpu_var(pmcs_enabled) = 1; - switch (systemcfg->platform) { - case PLATFORM_PSERIES: - case PLATFORM_POWERMAC: - hid0 = mfspr(HID0); - hid0 |= 1UL << (63 - 20); - - /* POWER4 requires the following sequence */ - asm volatile( - "sync\n" - "mtspr %1, %0\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): - "memory"); - break; - -#ifdef CONFIG_PPC_PSERIES - case PLATFORM_PSERIES_LPAR: - set = 1UL << 63; - reset = 0; - plpar_hcall_norets(H_PERFMON, set, reset); - break; -#endif /* CONFIG_PPC_PSERIES */ - - default: - break; - } - - /* instruct hypervisor to maintain PMCs */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - get_paca()->lppaca.pmcregs_in_use = 1; + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); } - -#else - -/* PMC stuff */ -void ppc64_enable_pmcs(void) -{ - /* XXX Implement for iseries */ -} -#endif /* CONFIG_PPC_MULTIPLATFORM */ - EXPORT_SYMBOL(ppc64_enable_pmcs); /* XXX convert to rusty's on_one_cpu */ diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index f0ef06375947..ff2c9287d3b6 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h @@ -140,6 +140,9 @@ struct machdep_calls { /* Idle loop for this platform, leave empty for default idle loop */ int (*idle_loop)(void); + + /* Function to enable pmcs for this platform, called once per cpu. */ + void (*enable_pmcs)(void); }; extern int default_idle(void); diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h index c924748c0bea..d1d297dbccfe 100644 --- a/include/asm-ppc64/pmc.h +++ b/include/asm-ppc64/pmc.h @@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *); int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); +void power4_enable_pmcs(void); + #endif /* _PPC64_PMC_H */ From 145ec7d51ae507c7cc8889ad05e24af05bbd9147 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:18 +1000 Subject: [PATCH 249/584] [PATCH] ppc64: Fix a misleading printk in unflatten_dt_node() When unflatten_dt_node() fails to find an OF_DT_END_NODE tag it prints "Weird tag at start of node", this should be "Weird tag at end of node". Signed-off-by: Michael Ellerman arch/ppc64/kernel/prom.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 255c39ae1b48..04b852d446a3 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -918,7 +918,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, tag = *((u32 *)(*p)); } if (tag != OF_DT_END_NODE) { - printk("Weird tag at start of node: %x\n", tag); + printk("Weird tag at end of node: %x\n", tag); return mem; } *p += 4; From 95920324f51b3a12603cf6d9bacbd831f34c5b60 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:19 +1000 Subject: [PATCH 250/584] [PATCH] ppc64: unflatten_device_tree() should check if lmb_alloc() fails unflatten_device_tree() doesn't check if lmb_alloc() succeeds or not, it should. All it can do is panic, but at least there's an error message (assuming you have some sort of console at that point). Signed-off-by: Michael Ellerman arch/ppc64/kernel/prom.c | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 04b852d446a3..b21848826791 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -950,8 +950,13 @@ void __init unflatten_device_tree(void) DBG(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = (unsigned long)abs_to_virt(lmb_alloc(size + 4, - __alignof__(struct device_node))); + mem = lmb_alloc(size + 4, __alignof__(struct device_node)); + if (!mem) { + DBG("Couldn't allocate memory with lmb_alloc()!\n"); + panic("Couldn't allocate memory with lmb_alloc()!\n"); + } + mem = (unsigned long)abs_to_virt(mem); + ((u32 *)mem)[size / 4] = 0xdeadbeef; DBG(" unflattening...\n", mem); From 9a5573e378c5c8976c6000a7643b52e2a0481688 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:20 +1000 Subject: [PATCH 251/584] [PATCH] ppc64: Check of_chosen in check_for_initrd() You can't call get_property() on a NULL node, so check if of_chosen is set in check_for_initrd(). Signed-off-by: Michael Ellerman arch/ppc64/kernel/setup.c | 20 ++++++++++++-------- 1 files changed, 12 insertions(+), 8 deletions(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/setup.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index b3ef8df12982..ee3b20de2e7a 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -536,15 +536,19 @@ static void __init check_for_initrd(void) DBG(" -> check_for_initrd()\n"); - prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); + if (of_chosen) { + prop = (u64 *)get_property(of_chosen, + "linux,initrd-start", NULL); if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; + initrd_start = (unsigned long)__va(*prop); + prop = (u64 *)get_property(of_chosen, + "linux,initrd-end", NULL); + if (prop != NULL) { + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; + } } /* If we were passed an initrd, set the ROOT_DEV properly if the values From c594adad5653491813959277fb87a2fef54c4e05 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 11 Aug 2005 16:55:21 +1000 Subject: [PATCH 252/584] [PATCH] Dynamic hugepage addresses for ppc64 Paulus, I think this is now a reasonable candidate for the post-2.6.13 queue. Relax address restrictions for hugepages on ppc64 Presently, 64-bit applications on ppc64 may only use hugepages in the address region from 1-1.5T. Furthermore, if hugepages are enabled in the kernel config, they may only use hugepages and never normal pages in this area. This patch relaxes this restriction, allowing any address to be used with hugepages, but with a 1TB granularity. That is if you map a hugepage anywhere in the region 1TB-2TB, that entire area will be reserved exclusively for hugepages for the remainder of the process's lifetime. This works analagously to hugepages in 32-bit applications, where hugepages can be mapped anywhere, but with 256MB (mmu segment) granularity. This patch applies on top of the four level pagetable patch (http://patchwork.ozlabs.org/linuxppc64/patch?id=1936). Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/asm-offsets.c | 3 +- arch/ppc64/mm/hugetlbpage.c | 211 ++++++++++++++++++++++++-------- arch/ppc64/mm/slb_low.S | 25 ++-- include/asm-ppc64/mmu.h | 2 +- include/asm-ppc64/page.h | 29 +++-- 5 files changed, 191 insertions(+), 79 deletions(-) diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c index abb9e5b5da03..17e35d0fed09 100644 --- a/arch/ppc64/kernel/asm-offsets.c +++ b/arch/ppc64/kernel/asm-offsets.c @@ -94,7 +94,8 @@ int main(void) DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); + DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); + DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); #endif /* CONFIG_HUGETLB_PAGE */ DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index a13e44230a6f..e7833c80eb68 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -27,6 +27,9 @@ #include +#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) +#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) + /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { @@ -129,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -static void flush_segments(void *parm) +static void flush_low_segments(void *parm) { - u16 segs = (unsigned long) parm; + u16 areas = (unsigned long) parm; unsigned long i; asm volatile("isync" : : : "memory"); - for (i = 0; i < 16; i++) { - if (! (segs & (1U << i))) + BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + + for (i = 0; i < NUM_LOW_AREAS; i++) { + if (! (areas & (1U << i))) continue; asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); } @@ -145,13 +150,33 @@ static void flush_segments(void *parm) asm volatile("isync" : : : "memory"); } -static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) +static void flush_high_segments(void *parm) { - unsigned long start = seg << SID_SHIFT; - unsigned long end = (seg+1) << SID_SHIFT; + u16 areas = (unsigned long) parm; + unsigned long i, j; + + asm volatile("isync" : : : "memory"); + + BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + + for (i = 0; i < NUM_HIGH_AREAS; i++) { + if (! (areas & (1U << i))) + continue; + for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) + asm volatile("slbie %0" + :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); + } + + asm volatile("isync" : : : "memory"); +} + +static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << SID_SHIFT; + unsigned long end = (area+1) << SID_SHIFT; struct vm_area_struct *vma; - BUG_ON(seg >= 16); + BUG_ON(area >= NUM_LOW_AREAS); /* Check no VMAs are in the region */ vma = find_vma(mm, start); @@ -161,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) return 0; } -static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) +static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << HTLB_AREA_SHIFT; + unsigned long end = (area+1) << HTLB_AREA_SHIFT; + struct vm_area_struct *vma; + + BUG_ON(area >= NUM_HIGH_AREAS); + + /* Check no VMAs are in the region */ + vma = find_vma(mm, start); + if (vma && (vma->vm_start < end)) + return -EBUSY; + + return 0; +} + +static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; - newsegs &= ~(mm->context.htlb_segs); - if (! newsegs) + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); + BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); + + newareas &= ~(mm->context.low_htlb_areas); + if (! newareas) return 0; /* The segments we want are already open */ - for (i = 0; i < 16; i++) - if ((1 << i) & newsegs) - if (prepare_low_seg_for_htlb(mm, i) != 0) + for (i = 0; i < NUM_LOW_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_low_area_for_htlb(mm, i) != 0) return -EBUSY; - mm->context.htlb_segs |= newsegs; + mm->context.low_htlb_areas |= newareas; /* update the paca copy of the context struct */ get_paca()->context = mm->context; @@ -182,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); + on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + return 0; +} + +static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) +{ + unsigned long i; + + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) + != NUM_HIGH_AREAS); + + newareas &= ~(mm->context.high_htlb_areas); + if (! newareas) + return 0; /* The areas we want are already open */ + + for (i = 0; i < NUM_HIGH_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_high_area_for_htlb(mm, i) != 0) + return -EBUSY; + + mm->context.high_htlb_areas |= newareas; + + /* update the paca copy of the context struct */ + get_paca()->context = mm->context; + + /* the context change must make it to memory before the flush, + * so that further SLB misses do the right thing. */ + mb(); + on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - if (within_hugepage_high_range(addr, len)) - return 0; - else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { - int err; - /* Yes, we need both tests, in case addr+len overflows - * 64-bit arithmetic */ - err = open_low_hpage_segs(current->mm, + int err; + + if ( (addr+len) < addr ) + return -EINVAL; + + if ((addr + len) < 0x100000000UL) + err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - if (err) - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (segs: 0x%04hx)\n", addr, len, - LOW_ESID_MASK(addr, len)); + else + err = open_high_hpage_areas(current->mm, + HTLB_AREA_MASK(addr, len)); + if (err) { + printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" + " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", + addr, len, + LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); return err; } - return -EINVAL; + return 0; } struct page * @@ -276,8 +354,8 @@ full_search: vma = find_vma(mm, addr); continue; } - if (touches_hugepage_high_range(addr, len)) { - addr = TASK_HPAGE_END; + if (touches_hugepage_high_range(mm, addr, len)) { + addr = ALIGN(addr+1, 1UL<mm, addr); - for (vma = find_vma(current->mm, addr); - addr + len <= TASK_HPAGE_END; - vma = vma->vm_next) { + while (addr + len <= TASK_SIZE_USER64) { BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - BUG_ON(! within_hugepage_high_range(addr, len)); + + if (! __within_hugepage_high_range(addr, len, areamask)) { + addr = ALIGN(addr+1, 1UL<mm, addr); + continue; + } if (!vma || (addr + len) <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Because we're in a hugepage region, this alignment - * should not skip us over any VMAs */ + /* Depending on segmask this might not be a confirmed + * hugepage region, so the ALIGN could have skipped + * some VMAs */ + vma = find_vma(current->mm, addr); } return -ENOMEM; @@ -474,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + int lastshift; + u16 areamask, curareas; + if (len & ~HPAGE_MASK) return -EINVAL; @@ -481,31 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -EINVAL; if (test_thread_flag(TIF_32BIT)) { - int lastshift = 0; - u16 segmask, cursegs = current->mm->context.htlb_segs; + curareas = current->mm->context.low_htlb_areas; /* First see if we can do the mapping in the existing - * low hpage segments */ - addr = htlb_get_low_area(len, cursegs); + * low areas */ + addr = htlb_get_low_area(len, curareas); if (addr != -ENOMEM) return addr; - for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; segmask >>=1) { - if (segmask & 1) + lastshift = 0; + for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) lastshift = 1; - addr = htlb_get_low_area(len, cursegs | segmask); + addr = htlb_get_low_area(len, curareas | areamask); if ((addr != -ENOMEM) - && open_low_hpage_segs(current->mm, segmask) == 0) + && open_low_hpage_areas(current->mm, areamask) == 0) return addr; } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough segments\n"); - return -ENOMEM; } else { - return htlb_get_high_area(len); + curareas = current->mm->context.high_htlb_areas; + + /* First see if we can do the mapping in the existing + * high areas */ + addr = htlb_get_high_area(len, curareas); + if (addr != -ENOMEM) + return addr; + + lastshift = 0; + for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) + lastshift = 1; + + addr = htlb_get_high_area(len, curareas | areamask); + if ((addr != -ENOMEM) + && open_high_hpage_areas(current->mm, areamask) == 0) + return addr; + } } + printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" + " enough areas\n"); + return -ENOMEM; } int hash_huge_page(struct mm_struct *mm, unsigned long access, diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index f20fc52483a7..bab255889c58 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S @@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) b 9f 0: /* user address: proto-VSID = context<<15 | ESID */ - li r11,SLB_VSID_USER - srdi. r9,r3,USER_ESID_BITS bne- 8f /* invalid ea bits set */ #ifdef CONFIG_HUGETLB_PAGE BEGIN_FTR_SECTION - /* check against the hugepage ranges */ - cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) - bge 6f /* >= TASK_HPAGE_END */ - cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) - bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ - cmpldi r3,16 - bge 6f /* 4GB..TASK_HPAGE_BASE */ + lhz r9,PACAHIGHHTLBAREAS(r13) + srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) + srd r9,r9,r11 + andi. r9,r9,1 + bne 5f - lhz r9,PACAHTLBSEGS(r13) + li r11,SLB_VSID_USER + + cmpldi r3,16 + bge 6f + + lhz r9,PACALOWHTLBAREAS(r13) srd r9,r9,r3 andi. r9,r9,1 + beq 6f -5: /* this is a hugepage user address */ - li r11,(SLB_VSID_USER|SLB_VSID_L) +5: li r11,SLB_VSID_USER|SLB_VSID_L END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 789c2693483c..ad36bb28de29 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -307,7 +307,7 @@ typedef unsigned long mm_context_id_t; typedef struct { mm_context_id_t id; #ifdef CONFIG_HUGETLB_PAGE - u16 htlb_segs; /* bitmask */ + u16 low_htlb_areas, high_htlb_areas; #endif } mm_context_t; diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index 7e7b18ea986e..a79a08df62bd 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -37,40 +37,45 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -/* For 64-bit processes the hugepage range is 1T-1.5T */ -#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) -#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE #define ARCH_HAS_SETCLEAR_HUGE_PTE #define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) -#define touches_hugepage_high_range(addr, len) \ - (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) #define __within_hugepage_low_range(addr, len, segmask) \ ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) #define within_hugepage_low_range(addr, len) \ __within_hugepage_low_range((addr), (len), \ - current->mm->context.htlb_segs) -#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ - && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) #define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((addr), (len)) || \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ touches_hugepage_low_range((mm), (addr), (len))) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define in_hugepage_area(context, addr) \ (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) #else /* !CONFIG_HUGETLB_PAGE */ From 2cba582a49f1535c1a12a687cfb3dab713c22cc4 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 29 Aug 2005 05:12:30 -0400 Subject: [PATCH 253/584] [libata sata_promise] Do not attempt to use SATA phy on PATA controllers --- drivers/scsi/sata_promise.c | 50 +++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 919fb314ad10..ad31b8afec6f 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -79,7 +79,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r static void pdc_eng_timeout(struct ata_port *ap); static int pdc_port_start(struct ata_port *ap); static void pdc_port_stop(struct ata_port *ap); -static void pdc_phy_reset(struct ata_port *ap); +static void pdc_pata_phy_reset(struct ata_port *ap); +static void pdc_sata_phy_reset(struct ata_port *ap); static void pdc_qc_prep(struct ata_queued_cmd *qc); static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf); static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); @@ -106,19 +107,22 @@ static Scsi_Host_Template pdc_ata_sht = { .ordered_flush = 1, }; -static struct ata_port_operations pdc_ata_ops = { +static struct ata_port_operations pdc_sata_ops = { .port_disable = ata_port_disable, .tf_load = pdc_tf_load_mmio, .tf_read = ata_tf_read, .check_status = ata_check_status, .exec_command = pdc_exec_command_mmio, .dev_select = ata_std_dev_select, - .phy_reset = pdc_phy_reset, + + .phy_reset = pdc_sata_phy_reset, + .qc_prep = pdc_qc_prep, .qc_issue = pdc_qc_issue_prot, .eng_timeout = pdc_eng_timeout, .irq_handler = pdc_interrupt, .irq_clear = pdc_irq_clear, + .scr_read = pdc_sata_scr_read, .scr_write = pdc_sata_scr_write, .port_start = pdc_port_start, @@ -126,6 +130,27 @@ static struct ata_port_operations pdc_ata_ops = { .host_stop = ata_host_stop, }; +static struct ata_port_operations pdc_pata_ops = { + .port_disable = ata_port_disable, + .tf_load = pdc_tf_load_mmio, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = pdc_exec_command_mmio, + .dev_select = ata_std_dev_select, + + .phy_reset = pdc_pata_phy_reset, + + .qc_prep = pdc_qc_prep, + .qc_issue = pdc_qc_issue_prot, + .eng_timeout = pdc_eng_timeout, + .irq_handler = pdc_interrupt, + .irq_clear = pdc_irq_clear, + + .port_start = pdc_port_start, + .port_stop = pdc_port_stop, + .host_stop = ata_host_stop, +}; + static struct ata_port_info pdc_port_info[] = { /* board_2037x */ { @@ -135,7 +160,7 @@ static struct ata_port_info pdc_port_info[] = { .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ - .port_ops = &pdc_ata_ops, + .port_ops = &pdc_sata_ops, }, /* board_20319 */ @@ -146,7 +171,7 @@ static struct ata_port_info pdc_port_info[] = { .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ - .port_ops = &pdc_ata_ops, + .port_ops = &pdc_sata_ops, }, /* board_20619 */ @@ -157,7 +182,7 @@ static struct ata_port_info pdc_port_info[] = { .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ - .port_ops = &pdc_ata_ops, + .port_ops = &pdc_pata_ops, }, }; @@ -268,12 +293,23 @@ static void pdc_reset_port(struct ata_port *ap) readl(mmio); /* flush */ } -static void pdc_phy_reset(struct ata_port *ap) +static void pdc_sata_phy_reset(struct ata_port *ap) { pdc_reset_port(ap); sata_phy_reset(ap); } +static void pdc_pata_phy_reset(struct ata_port *ap) +{ + /* FIXME: add cable detect. Don't assume 40-pin cable */ + ap->cbl = ATA_CBL_PATA40; + ap->udma_mask &= ATA_UDMA_MASK_40C; + + pdc_reset_port(ap); + ata_port_probe(ap); + ata_bus_reset(ap); +} + static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg) { if (sc_reg > SCR_CONTROL) From 69be8f189653cd81aae5a74e26615b12871bb72e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Aug 2005 11:44:09 -0400 Subject: [PATCH 254/584] [PATCH] convert signal handling of NODEFER to act like other Unix boxes. It has been reported that the way Linux handles NODEFER for signals is not consistent with the way other Unix boxes handle it. I've written a program to test the behavior of how this flag affects signals and had several reports from people who ran this on various Unix boxes, confirming that Linux seems to be unique on the way this is handled. The way NODEFER affects signals on other Unix boxes is as follows: 1) If NODEFER is set, other signals in sa_mask are still blocked. 2) If NODEFER is set and the signal is in sa_mask, then the signal is still blocked. (Note: this is the behavior of all tested but Linux _and_ NetBSD 2.0 *). The way NODEFER affects signals on Linux: 1) If NODEFER is set, other signals are _not_ blocked regardless of sa_mask (Even NetBSD doesn't do this). 2) If NODEFER is set and the signal is in sa_mask, then the signal being handled is not blocked. The patch converts signal handling in all current Linux architectures to the way most Unix boxes work. Unix boxes that were tested: DU4, AIX 5.2, Irix 6.5, NetBSD 2.0, SFU 3.5 on WinXP, AIX 5.3, Mac OSX, and of course Linux 2.6.13-rcX. * NetBSD was the only other Unix to behave like Linux on point #2. The main concern was brought up by point #1 which even NetBSD isn't like Linux. So with this patch, we leave NetBSD as the lonely one that behaves differently here with #2. Signed-off-by: Linus Torvalds --- arch/alpha/kernel/signal.c | 11 +++++------ arch/arm/kernel/signal.c | 5 +++-- arch/arm26/kernel/signal.c | 13 ++++++------- arch/cris/arch-v10/kernel/signal.c | 11 +++++------ arch/cris/arch-v32/kernel/signal.c | 11 +++++------ arch/frv/kernel/signal.c | 11 +++++------ arch/h8300/kernel/signal.c | 11 +++++------ arch/i386/kernel/signal.c | 5 +++-- arch/ia64/kernel/signal.c | 15 ++++++--------- arch/m32r/kernel/signal.c | 11 +++++------ arch/m68knommu/kernel/signal.c | 11 +++++------ arch/mips/kernel/irixsig.c | 11 +++++------ arch/mips/kernel/signal.c | 11 +++++------ arch/mips/kernel/signal32.c | 11 +++++------ arch/parisc/kernel/signal.c | 11 +++++------ arch/ppc/kernel/signal.c | 11 +++++------ arch/ppc64/kernel/signal.c | 5 +++-- arch/ppc64/kernel/signal32.c | 5 +++-- arch/s390/kernel/compat_signal.c | 11 +++++------ arch/s390/kernel/signal.c | 11 +++++------ arch/sh/kernel/signal.c | 11 +++++------ arch/sh64/kernel/signal.c | 11 +++++------ arch/sparc/kernel/signal.c | 11 +++++------ arch/sparc64/kernel/signal.c | 11 +++++------ arch/sparc64/kernel/signal32.c | 11 +++++------ arch/um/kernel/signal_kern.c | 6 +++--- arch/v850/kernel/signal.c | 11 +++++------ arch/x86_64/kernel/signal.c | 5 +++-- arch/xtensa/kernel/signal.c | 11 +++++------ 29 files changed, 135 insertions(+), 155 deletions(-) diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 08fe8071a7f8..2e45e8604e32 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -566,13 +566,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_RESETHAND) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } static inline void diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 5e435e42dacd..a94d75fef598 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -658,11 +658,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, /* * Block the signal if we were unsuccessful. */ - if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) { + if (ret != 0) { spin_lock_irq(&tsk->sighand->siglock); sigorsets(&tsk->blocked, &tsk->blocked, &ka->sa.sa_mask); - sigaddset(&tsk->blocked, sig); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&tsk->blocked, sig); recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); } diff --git a/arch/arm26/kernel/signal.c b/arch/arm26/kernel/signal.c index 356d9809cc0b..ce2055bdc9ee 100644 --- a/arch/arm26/kernel/signal.c +++ b/arch/arm26/kernel/signal.c @@ -454,14 +454,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(&tsk->sighand->siglock); - sigorsets(&tsk->blocked, &tsk->blocked, - &ka->sa.sa_mask); + spin_lock_irq(&tsk->sighand->siglock); + sigorsets(&tsk->blocked, &tsk->blocked, + &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(&tsk->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(&tsk->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); return; } diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c index 85e0032e664f..693771961f85 100644 --- a/arch/cris/arch-v10/kernel/signal.c +++ b/arch/cris/arch-v10/kernel/signal.c @@ -517,13 +517,12 @@ handle_signal(int canrestart, unsigned long sig, if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c index fb4c79d5b76b..0a3614dab887 100644 --- a/arch/cris/arch-v32/kernel/signal.c +++ b/arch/cris/arch-v32/kernel/signal.c @@ -568,13 +568,12 @@ handle_signal(int canrestart, unsigned long sig, if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 36a2dffc8ebd..d4ccc0728dfe 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -506,13 +506,12 @@ static void handle_signal(unsigned long sig, siginfo_t *info, else setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* end handle_signal() */ /*****************************************************************************/ diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 5aab87eae1f9..f13d5e82d4b9 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -488,13 +488,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 89ef7adc63a4..140e340569c6 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else ret = setup_frame(sig, ka, oldset, regs); - if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { + if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - sigaddset(¤t->blocked,sig); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index b8a0a7d257a9..774f34b675cf 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -467,15 +467,12 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse if (!setup_frame(sig, ka, info, oldset, scr)) return 0; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - { - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - } - spin_unlock_irq(¤t->sighand->siglock); - } + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); return 1; } diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 5aef7e406ef5..71763f7a1d19 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -341,13 +341,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, /* Set up the stack frame */ setup_rt_frame(sig, ka, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/m68knommu/kernel/signal.c b/arch/m68knommu/kernel/signal.c index 30dceb59a462..43a2726c0d0a 100644 --- a/arch/m68knommu/kernel/signal.c +++ b/arch/m68knommu/kernel/signal.c @@ -732,13 +732,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c index 40244782a8e5..4c114ae21793 100644 --- a/arch/mips/kernel/irixsig.c +++ b/arch/mips/kernel/irixsig.c @@ -155,13 +155,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info, else setup_irix_frame(ka, regs, sig, oldset); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } asmlinkage int do_irix_signal(sigset_t *oldset, struct pt_regs *regs) diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 65ee15396ffd..0209c1dd1429 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -425,13 +425,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info, setup_frame(ka, regs, sig, oldset); #endif - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } extern int do_signal32(sigset_t *oldset, struct pt_regs *regs); diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index c1a69cf232f9..f6875f023a29 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -751,13 +751,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info, else setup_frame(ka, regs, sig, oldset); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } int do_signal32(sigset_t *oldset, struct pt_regs *regs) diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 9421bb98ea63..55d71c15e1f7 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -517,13 +517,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (!setup_rt_frame(sig, ka, info, oldset, regs, in_syscall)) return 0; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); return 1; } diff --git a/arch/ppc/kernel/signal.c b/arch/ppc/kernel/signal.c index 8aaeb6f4e750..2244bf91e593 100644 --- a/arch/ppc/kernel/signal.c +++ b/arch/ppc/kernel/signal.c @@ -759,13 +759,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) else handle_signal(signr, &ka, &info, oldset, regs, newsp); - if (!(ka.sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka.sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka.sa.sa_mask); + if (!(ka.sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); return 1; } diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c index bf782276984c..49a79a55c32d 100644 --- a/arch/ppc64/kernel/signal.c +++ b/arch/ppc64/kernel/signal.c @@ -481,10 +481,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { + if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - sigaddset(¤t->blocked,sig); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c index 3c2fa5c284c0..46f4d6cc7fc9 100644 --- a/arch/ppc64/kernel/signal32.c +++ b/arch/ppc64/kernel/signal32.c @@ -976,11 +976,12 @@ int do_signal32(sigset_t *oldset, struct pt_regs *regs) else ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp); - if (ret && !(ka.sa.sa_flags & SA_NODEFER)) { + if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka.sa.sa_mask); - sigaddset(¤t->blocked, signr); + if (!(ka.sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index d05d65ac9694..7358cdb8441f 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -637,12 +637,11 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka, else setup_frame32(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 610c1d03e975..6a3f5b7473a9 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -429,13 +429,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, else setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c index 8022243f0178..b475c4d2405f 100644 --- a/arch/sh/kernel/signal.c +++ b/arch/sh/kernel/signal.c @@ -546,13 +546,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c index c6a14a87c59b..3ea8929e483b 100644 --- a/arch/sh64/kernel/signal.c +++ b/arch/sh64/kernel/signal.c @@ -664,13 +664,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c index 011ff35057a5..5f34d7dc2b89 100644 --- a/arch/sparc/kernel/signal.c +++ b/arch/sparc/kernel/signal.c @@ -1034,13 +1034,12 @@ handle_signal(unsigned long signr, struct k_sigaction *ka, else setup_frame(&ka->sa, regs, signr, oldset, info); } - if (!(ka->sa.sa_flags & SA_NOMASK)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NOMASK)) sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index b27934671c35..60f5dfabb1e1 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -574,13 +574,12 @@ static inline void handle_signal(unsigned long signr, struct k_sigaction *ka, { setup_rt_frame(ka, regs, signr, oldset, (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); - if (!(ka->sa.sa_flags & SA_NOMASK)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NOMASK)) sigaddset(¤t->blocked,signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index f28428f4170e..b1ed23091fbb 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -1325,13 +1325,12 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka, else setup_frame32(&ka->sa, regs, signr, oldset, info); } - if (!(ka->sa.sa_flags & SA_NOMASK)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NOMASK)) sigaddset(¤t->blocked,signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs, diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c index 7807a3e8c426..03618bd13d55 100644 --- a/arch/um/kernel/signal_kern.c +++ b/arch/um/kernel/signal_kern.c @@ -87,12 +87,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); force_sigsegv(signr, current); - } - else if(!(ka->sa.sa_flags & SA_NODEFER)){ + } else { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - sigaddset(¤t->blocked, signr); + if(!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/v850/kernel/signal.c b/arch/v850/kernel/signal.c index 37061e32e1a4..633e4e1b825f 100644 --- a/arch/v850/kernel/signal.c +++ b/arch/v850/kernel/signal.c @@ -462,13 +462,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); } /* diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 98590a989f3d..d642fbf3da29 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -394,10 +394,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { + if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - sigaddset(¤t->blocked,sig); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index df6e1e17b096..dc42cede9394 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -702,12 +702,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) if (ka.sa.sa_flags & SA_ONESHOT) ka.sa.sa_handler = SIG_DFL; - if (!(ka.sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka.sa.sa_mask); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka.sa.sa_mask); + if (!(ka.sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); return 1; } From 5bbe6ab938208bb3d4e16f7a28b11fcddd4f9ff2 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sun, 28 Aug 2005 18:05:17 -0400 Subject: [PATCH 255/584] [PATCH] new name for 2.6.14 We've had Woozy Numbat for a while now. Here's an updated name care of Jeff Garzik and myself. Signed-off-by: Benjamin LaHaise Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5acd1fc68d17..3d84df581cf2 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 13 EXTRAVERSION = -NAME=Woozy Numbat +NAME=Affluent Albatross # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 9e2d3cd34a159948dc753a14573e16bffc04dba8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Aug 2005 03:19:14 +0100 Subject: [PATCH 256/584] [PATCH] mod_devicetable.h fixes * ieee1394_device_id has kernel_ulong_t field after an odd number of __u32 ones. Since mod_devicetable.h is included both from kernel and from host build helper, we may be in trouble if we are building on 32bit host for 64bit target - userland sees unsigned long long, kernel sees unsigned long and while their sizes match, alignments might not. Fixed by forcing alignment. Fortunately, almost nobody else needs that - the rest of such fields is naturally aligned as it is. * of_device_id has void * in it. Host userland helpers need kernel_ulong_t instead, since their void * might have nothing to do with the kernel one. Fixed in the same way it's done for similar problems in pcmcia_device_id (ifdef __KERNEL__). * pcmcia_device_id has the same problem as ieee1394_device_id. Fixed the same way. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index dce53ac1625d..97bbccdbcca3 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,7 +33,8 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data; + kernel_ulong_t driver_data + __attribute__((aligned(sizeof(kernel_ulong_t)))); }; @@ -182,7 +183,11 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; +#if __KERNEL__ void *data; +#else + kernel_ulong_t data; +#endif }; @@ -208,7 +213,8 @@ struct pcmcia_device_id { #ifdef __KERNEL__ const char * prod_id[4]; #else - kernel_ulong_t prod_id[4]; + kernel_ulong_t prod_id[4] + __attribute__((aligned(sizeof(kernel_ulong_t)))); #endif /* not matched against */ From 03ecc6749ab2bfe7baf84b3e0e35018884758fa3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Aug 2005 03:47:50 +0100 Subject: [PATCH 257/584] [PATCH] missing include in tda80xx Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/media/dvb/frontends/tda80xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/dvb/frontends/tda80xx.c b/drivers/media/dvb/frontends/tda80xx.c index 88e125079ca1..d1cabb6a0a13 100644 --- a/drivers/media/dvb/frontends/tda80xx.c +++ b/drivers/media/dvb/frontends/tda80xx.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "dvb_frontend.h" From bf4e70e54cf31dcca48d279c7f7e71328eebe749 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Aug 2005 03:52:22 +0100 Subject: [PATCH 258/584] [PATCH] missing include in smc-ultra Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/net/smc-ultra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/smc-ultra.c b/drivers/net/smc-ultra.c index 6d9dae60a697..ba8593ac3f8a 100644 --- a/drivers/net/smc-ultra.c +++ b/drivers/net/smc-ultra.c @@ -68,6 +68,7 @@ static const char version[] = #include #include +#include #include #include "8390.h" From aa7e16d6b88b3b38db0d2ee49ed5e44e7b2045ec Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 29 Aug 2005 15:12:56 -0400 Subject: [PATCH 259/584] [libata sata_nv] NVIDIA ok'd license change from OSL+GPL to GPL --- drivers/scsi/sata_nv.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 1e10370adc34..a61c1509aebb 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -4,21 +4,20 @@ * Copyright 2004 NVIDIA Corp. All rights reserved. * Copyright 2004 Andrew Chew * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * * * libata documentation is available via 'make {ps|pdf}docs', From 5ea68e02766c52c153c62fc423cda659a80e45fa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:44:40 -0700 Subject: [PATCH 260/584] [SPARC64]: Fix trap state reading for instruction_access_exception. 1) Read ASI_IMMU SFSR not ASI_DMMU. 2) IMMU has no SFAR, read TPC instead 3) Delete old and incorrect comment about the DTLB protection trap having a dependency on the SFSR contents in order to function correctly Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 88332f00094a..6d0476ff4ff8 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -690,11 +690,6 @@ netbsd_syscall: retl nop - /* These next few routines must be sure to clear the - * SFSR FaultValid bit so that the fast tlb data protection - * handler does not flush the wrong context and lock up the - * box. - */ .globl __do_data_access_exception .globl __do_data_access_exception_tl1 __do_data_access_exception_tl1: @@ -733,9 +728,8 @@ __do_instruction_access_exception_tl1: rdpr %pstate, %g4 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate mov TLB_SFSR, %g3 - mov DMMU_SFAR, %g5 - ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR - ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit membar #Sync sethi %hi(109f), %g7 @@ -752,9 +746,8 @@ __do_instruction_access_exception: rdpr %pstate, %g4 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate mov TLB_SFSR, %g3 - mov DMMU_SFAR, %g5 - ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR - ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit membar #Sync sethi %hi(109f), %g7 From bde4e4ee9f90142d550e2684dec2c8df302f5f8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:44:57 -0700 Subject: [PATCH 261/584] [SPARC64]: Do not call winfix_dax blindly Verify we really are taking a data access exception trap, at TL1, from one of the window spill/fill handlers. Else call a new function, data_access_exception_tl1, to log the error. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 16 ++++++++++++++++ arch/sparc64/kernel/traps.c | 11 +++++++++++ 2 files changed, 27 insertions(+) diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 6d0476ff4ff8..214cd0ebe832 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -701,8 +701,24 @@ __do_data_access_exception_tl1: ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit membar #Sync + rdpr %tt, %g3 + cmp %g3, 0x80 ! first win spill/fill trap + blu,pn %xcc, 1f + cmp %g3, 0xff ! last win spill/fill trap + bgu,pn %xcc, 1f + nop ba,pt %xcc, winfix_dax rdpr %tpc, %g3 +1: sethi %hi(109f), %g7 + ba,pt %xcc, etraptl1 +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 + __do_data_access_exception: rdpr %pstate, %g4 wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 0c9e54b2f0c8..210b3e321c29 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -220,6 +220,17 @@ void data_access_exception(struct pt_regs *regs, force_sig_info(SIGSEGV, &info, current); } +void data_access_exception_tl1(struct pt_regs *regs, + unsigned long sfsr, unsigned long sfar) +{ + if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, + 0, 0x30, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + data_access_exception(regs, sfsr, sfar); +} + #ifdef CONFIG_PCI /* This is really pathetic... */ extern volatile int pci_poke_in_progress; From 6c52a96e6cacb35403b85c3b42db0faf26f3ed85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:45:11 -0700 Subject: [PATCH 262/584] [SPARC64]: Revamp Spitfire error trap handling. Current uncorrectable error handling was poor enough that the processor could just loop taking the same trap over and over again. Fix things up so that we at least get a log message and perhaps even some register state. In the process, much consolidation became possible, particularly with the correctable error handler. Prefix assembler and C function names with "spitfire" to indicate that these are for Ultra-I/II/IIi/IIe only. More work is needed to make these routines robust and featureful to the level of the Ultra-III error handlers. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 336 +++++++++++++++++++------------- arch/sparc64/kernel/traps.c | 264 ++++++++++++++----------- arch/sparc64/kernel/ttable.S | 27 +-- arch/sparc64/kernel/unaligned.c | 18 +- arch/sparc64/kernel/winfixup.S | 6 +- include/asm-sparc64/sfafsr.h | 82 ++++++++ 6 files changed, 453 insertions(+), 280 deletions(-) create mode 100644 include/asm-sparc64/sfafsr.h diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 214cd0ebe832..cecdc0a7521f 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -21,6 +21,7 @@ #include #include #include +#include #define curptr g6 @@ -690,88 +691,102 @@ netbsd_syscall: retl nop - .globl __do_data_access_exception - .globl __do_data_access_exception_tl1 -__do_data_access_exception_tl1: - rdpr %pstate, %g4 - wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate - mov TLB_SFSR, %g3 - mov DMMU_SFAR, %g5 - ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR - ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR - stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit + /* We need to carefully read the error status, ACK + * the errors, prevent recursive traps, and pass the + * information on to C code for logging. + * + * We pass the AFAR in as-is, and we encode the status + * information as described in asm-sparc64/sfafsr.h + */ + .globl __spitfire_access_error +__spitfire_access_error: + /* Disable ESTATE error reporting so that we do not + * take recursive traps and RED state the processor. + */ + stxa %g0, [%g0] ASI_ESTATE_ERROR_EN membar #Sync + + mov UDBE_UE, %g1 + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR + + /* __spitfire_cee_trap branches here with AFSR in %g4 and + * UDBE_CE in %g1. It only clears ESTATE_ERR_CE in the + * ESTATE Error Enable register. + */ +__spitfire_cee_trap_continue: + ldxa [%g0] ASI_AFAR, %g5 ! Get AFAR + rdpr %tt, %g3 - cmp %g3, 0x80 ! first win spill/fill trap - blu,pn %xcc, 1f - cmp %g3, 0xff ! last win spill/fill trap - bgu,pn %xcc, 1f + and %g3, 0x1ff, %g3 ! Paranoia + sllx %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3 + or %g4, %g3, %g4 + rdpr %tl, %g3 + cmp %g3, 1 + mov 1, %g3 + bleu %xcc, 1f + sllx %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3 + + or %g4, %g3, %g4 + + /* Read in the UDB error register state, clearing the + * sticky error bits as-needed. We only clear them if + * the UE bit is set. Likewise, __spitfire_cee_trap + * below will only do so if the CE bit is set. + * + * NOTE: UltraSparc-I/II have high and low UDB error + * registers, corresponding to the two UDB units + * present on those chips. UltraSparc-IIi only + * has a single UDB, called "SDB" in the manual. + * For IIi the upper UDB register always reads + * as zero so for our purposes things will just + * work with the checks below. + */ +1: ldxa [%g0] ASI_UDBH_ERROR_R, %g3 + and %g3, 0x3ff, %g7 ! Paranoia + sllx %g7, SFSTAT_UDBH_SHIFT, %g7 + or %g4, %g7, %g4 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE + be,pn %xcc, 1f nop - ba,pt %xcc, winfix_dax - rdpr %tpc, %g3 -1: sethi %hi(109f), %g7 + stxa %g3, [%g0] ASI_UDB_ERROR_W + membar #Sync + +1: mov 0x18, %g3 + ldxa [%g3] ASI_UDBL_ERROR_R, %g3 + and %g3, 0x3ff, %g7 ! Paranoia + sllx %g7, SFSTAT_UDBL_SHIFT, %g7 + or %g4, %g7, %g4 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE + be,pn %xcc, 1f + nop + mov 0x18, %g7 + stxa %g3, [%g7] ASI_UDB_ERROR_W + membar #Sync + +1: /* Ok, now that we've latched the error state, + * clear the sticky bits in the AFSR. + */ + stxa %g4, [%g0] ASI_AFSR + membar #Sync + + rdpr %tl, %g2 + cmp %g2, 1 + rdpr %pil, %g2 + bleu,pt %xcc, 1f + wrpr %g0, 15, %pil + ba,pt %xcc, etraptl1 -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 - mov %l5, %o2 - call data_access_exception_tl1 - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 + rd %pc, %g7 -__do_data_access_exception: - rdpr %pstate, %g4 - wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate - mov TLB_SFSR, %g3 - mov DMMU_SFAR, %g5 - ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR - ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR - stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit - membar #Sync - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 - mov %l5, %o2 - call data_access_exception - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 + ba,pt %xcc, 2f + nop - .globl __do_instruction_access_exception - .globl __do_instruction_access_exception_tl1 -__do_instruction_access_exception_tl1: - rdpr %pstate, %g4 - wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate - mov TLB_SFSR, %g3 - ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR - rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC - stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit - membar #Sync - sethi %hi(109f), %g7 - ba,pt %xcc, etraptl1 -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 - mov %l5, %o2 - call instruction_access_exception_tl1 - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 +1: ba,pt %xcc, etrap_irq + rd %pc, %g7 -__do_instruction_access_exception: - rdpr %pstate, %g4 - wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate - mov TLB_SFSR, %g3 - ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR - rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC - stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit - membar #Sync - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 +2: mov %l4, %o1 mov %l5, %o2 - call instruction_access_exception + call spitfire_access_error add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap clr %l6 @@ -793,79 +808,124 @@ __do_instruction_access_exception: * as it is the only situation where we can safely record * and log. For trap level >1 we just clear the CE bit * in the AFSR and return. - */ - - /* Our trap handling infrastructure allows us to preserve - * two 64-bit values during etrap for arguments to - * subsequent C code. Therefore we encode the information - * as follows: * - * value 1) Full 64-bits of AFAR - * value 2) Low 33-bits of AFSR, then bits 33-->42 - * are UDBL error status and bits 43-->52 - * are UDBH error status + * This is just like __spiftire_access_error above, but it + * specifically handles correctable errors. If an + * uncorrectable error is indicated in the AFSR we + * will branch directly above to __spitfire_access_error + * to handle it instead. Uncorrectable therefore takes + * priority over correctable, and the error logging + * C code will notice this case by inspecting the + * trap type. */ - .align 64 - .globl cee_trap -cee_trap: - ldxa [%g0] ASI_AFSR, %g1 ! Read AFSR - ldxa [%g0] ASI_AFAR, %g2 ! Read AFAR - sllx %g1, 31, %g1 ! Clear reserved bits - srlx %g1, 31, %g1 ! in AFSR + .globl __spitfire_cee_trap +__spitfire_cee_trap: + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR + mov 1, %g3 + sllx %g3, SFAFSR_UE_SHIFT, %g3 + andcc %g4, %g3, %g0 ! Check for UE + bne,pn %xcc, __spitfire_access_error + nop - /* NOTE: UltraSparc-I/II have high and low UDB error - * registers, corresponding to the two UDB units - * present on those chips. UltraSparc-IIi only - * has a single UDB, called "SDB" in the manual. - * For IIi the upper UDB register always reads - * as zero so for our purposes things will just - * work with the checks below. + /* Ok, in this case we only have a correctable error. + * Indicate we only wish to capture that state in register + * %g1, and we only disable CE error reporting unlike UE + * handling which disables all errors. */ - ldxa [%g0] ASI_UDBL_ERROR_R, %g3 ! Read UDB-Low error status - andcc %g3, (1 << 8), %g4 ! Check CE bit - sllx %g3, (64 - 10), %g3 ! Clear reserved bits - srlx %g3, (64 - 10), %g3 ! in UDB-Low error status + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g3 + andn %g3, ESTATE_ERR_CE, %g3 + stxa %g3, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync - sllx %g3, (33 + 0), %g3 ! Shift up to encoding area - or %g1, %g3, %g1 ! Or it in - be,pn %xcc, 1f ! Branch if CE bit was clear + /* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */ + ba,pt %xcc, __spitfire_cee_trap_continue + mov UDBE_CE, %g1 + + .globl __spitfire_data_access_exception + .globl __spitfire_data_access_exception_tl1 +__spitfire_data_access_exception_tl1: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + mov DMMU_SFAR, %g5 + ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR + ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit + membar #Sync + rdpr %tt, %g3 + cmp %g3, 0x80 ! first win spill/fill trap + blu,pn %xcc, 1f + cmp %g3, 0xff ! last win spill/fill trap + bgu,pn %xcc, 1f nop - stxa %g4, [%g0] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBL - membar #Sync ! Synchronize ASI stores -1: mov 0x18, %g5 ! Addr of UDB-High error status - ldxa [%g5] ASI_UDBH_ERROR_R, %g3 ! Read it + ba,pt %xcc, winfix_dax + rdpr %tpc, %g3 +1: sethi %hi(109f), %g7 + ba,pt %xcc, etraptl1 +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 - andcc %g3, (1 << 8), %g4 ! Check CE bit - sllx %g3, (64 - 10), %g3 ! Clear reserved bits - srlx %g3, (64 - 10), %g3 ! in UDB-High error status - sllx %g3, (33 + 10), %g3 ! Shift up to encoding area - or %g1, %g3, %g1 ! Or it in - be,pn %xcc, 1f ! Branch if CE bit was clear - nop - nop +__spitfire_data_access_exception: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + mov DMMU_SFAR, %g5 + ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR + ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_data_access_exception + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 - stxa %g4, [%g5] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBH - membar #Sync ! Synchronize ASI stores -1: mov 1, %g5 ! AFSR CE bit is - sllx %g5, 20, %g5 ! bit 20 - stxa %g5, [%g0] ASI_AFSR ! Clear CE sticky bit in AFSR - membar #Sync ! Synchronize ASI stores - sllx %g2, (64 - 41), %g2 ! Clear reserved bits - srlx %g2, (64 - 41), %g2 ! in latched AFAR + .globl __spitfire_insn_access_exception + .globl __spitfire_insn_access_exception_tl1 +__spitfire_insn_access_exception_tl1: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC + stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etraptl1 +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_insn_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 - andn %g2, 0x0f, %g2 ! Finish resv bit clearing - mov %g1, %g4 ! Move AFSR+UDB* into save reg - mov %g2, %g5 ! Move AFAR into save reg - rdpr %pil, %g2 - wrpr %g0, 15, %pil - ba,pt %xcc, etrap_irq - rd %pc, %g7 - mov %l4, %o0 - - mov %l5, %o1 - call cee_log - add %sp, PTREGS_OFF, %o2 - ba,a,pt %xcc, rtrap_irq +__spitfire_insn_access_exception: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC + stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_insn_access_exception + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 /* Capture I/D/E-cache state into per-cpu error scoreboard. * diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 210b3e321c29..b280b2ef674f 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -143,8 +144,7 @@ void do_BUG(const char *file, int line) } #endif -void instruction_access_exception(struct pt_regs *regs, - unsigned long sfsr, unsigned long sfar) +void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { siginfo_t info; @@ -153,8 +153,8 @@ void instruction_access_exception(struct pt_regs *regs, return; if (regs->tstate & TSTATE_PRIV) { - printk("instruction_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n", - sfsr, sfar); + printk("spitfire_insn_access_exception: SFSR[%016lx] " + "SFAR[%016lx], going.\n", sfsr, sfar); die_if_kernel("Iax", regs); } if (test_thread_flag(TIF_32BIT)) { @@ -169,19 +169,17 @@ void instruction_access_exception(struct pt_regs *regs, force_sig_info(SIGSEGV, &info, current); } -void instruction_access_exception_tl1(struct pt_regs *regs, - unsigned long sfsr, unsigned long sfar) +void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, 0, 0x8, SIGTRAP) == NOTIFY_STOP) return; dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - instruction_access_exception(regs, sfsr, sfar); + spitfire_insn_access_exception(regs, sfsr, sfar); } -void data_access_exception(struct pt_regs *regs, - unsigned long sfsr, unsigned long sfar) +void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { siginfo_t info; @@ -207,8 +205,8 @@ void data_access_exception(struct pt_regs *regs, return; } /* Shit... */ - printk("data_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n", - sfsr, sfar); + printk("spitfire_data_access_exception: SFSR[%016lx] " + "SFAR[%016lx], going.\n", sfsr, sfar); die_if_kernel("Dax", regs); } @@ -220,15 +218,14 @@ void data_access_exception(struct pt_regs *regs, force_sig_info(SIGSEGV, &info, current); } -void data_access_exception_tl1(struct pt_regs *regs, - unsigned long sfsr, unsigned long sfar) +void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, 0, 0x30, SIGTRAP) == NOTIFY_STOP) return; dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - data_access_exception(regs, sfsr, sfar); + spitfire_data_access_exception(regs, sfsr, sfar); } #ifdef CONFIG_PCI @@ -264,54 +261,13 @@ static void spitfire_clean_and_reenable_l1_caches(void) : "memory"); } -void do_iae(struct pt_regs *regs) +static void spitfire_enable_estate_errors(void) { - siginfo_t info; - - spitfire_clean_and_reenable_l1_caches(); - - if (notify_die(DIE_TRAP, "instruction access exception", regs, - 0, 0x8, SIGTRAP) == NOTIFY_STOP) - return; - - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_OBJERR; - info.si_addr = (void *)0; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); -} - -void do_dae(struct pt_regs *regs) -{ - siginfo_t info; - -#ifdef CONFIG_PCI - if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { - spitfire_clean_and_reenable_l1_caches(); - - pci_poke_faulted = 1; - - /* Why the fuck did they have to change this? */ - if (tlb_type == cheetah || tlb_type == cheetah_plus) - regs->tpc += 4; - - regs->tnpc = regs->tpc + 4; - return; - } -#endif - spitfire_clean_and_reenable_l1_caches(); - - if (notify_die(DIE_TRAP, "data access exception", regs, - 0, 0x30, SIGTRAP) == NOTIFY_STOP) - return; - - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_OBJERR; - info.si_addr = (void *)0; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (ESTATE_ERR_ALL), + "i" (ASI_ESTATE_ERROR_EN)); } static char ecc_syndrome_table[] = { @@ -349,65 +305,15 @@ static char ecc_syndrome_table[] = { 0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a }; -/* cee_trap in entry.S encodes AFSR/UDBH/UDBL error status - * in the following format. The AFAR is left as is, with - * reserved bits cleared, and is a raw 40-bit physical - * address. - */ -#define CE_STATUS_UDBH_UE (1UL << (43 + 9)) -#define CE_STATUS_UDBH_CE (1UL << (43 + 8)) -#define CE_STATUS_UDBH_ESYNDR (0xffUL << 43) -#define CE_STATUS_UDBH_SHIFT 43 -#define CE_STATUS_UDBL_UE (1UL << (33 + 9)) -#define CE_STATUS_UDBL_CE (1UL << (33 + 8)) -#define CE_STATUS_UDBL_ESYNDR (0xffUL << 33) -#define CE_STATUS_UDBL_SHIFT 33 -#define CE_STATUS_AFSR_MASK (0x1ffffffffUL) -#define CE_STATUS_AFSR_ME (1UL << 32) -#define CE_STATUS_AFSR_PRIV (1UL << 31) -#define CE_STATUS_AFSR_ISAP (1UL << 30) -#define CE_STATUS_AFSR_ETP (1UL << 29) -#define CE_STATUS_AFSR_IVUE (1UL << 28) -#define CE_STATUS_AFSR_TO (1UL << 27) -#define CE_STATUS_AFSR_BERR (1UL << 26) -#define CE_STATUS_AFSR_LDP (1UL << 25) -#define CE_STATUS_AFSR_CP (1UL << 24) -#define CE_STATUS_AFSR_WP (1UL << 23) -#define CE_STATUS_AFSR_EDP (1UL << 22) -#define CE_STATUS_AFSR_UE (1UL << 21) -#define CE_STATUS_AFSR_CE (1UL << 20) -#define CE_STATUS_AFSR_ETS (0xfUL << 16) -#define CE_STATUS_AFSR_ETS_SHIFT 16 -#define CE_STATUS_AFSR_PSYND (0xffffUL << 0) -#define CE_STATUS_AFSR_PSYND_SHIFT 0 - -/* Layout of Ecache TAG Parity Syndrome of AFSR */ -#define AFSR_ETSYNDROME_7_0 0x1UL /* E$-tag bus bits <7:0> */ -#define AFSR_ETSYNDROME_15_8 0x2UL /* E$-tag bus bits <15:8> */ -#define AFSR_ETSYNDROME_21_16 0x4UL /* E$-tag bus bits <21:16> */ -#define AFSR_ETSYNDROME_24_22 0x8UL /* E$-tag bus bits <24:22> */ - static char *syndrome_unknown = ""; -asmlinkage void cee_log(unsigned long ce_status, - unsigned long afar, - struct pt_regs *regs) +static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit) { - char memmod_str[64]; - char *p; - unsigned short scode, udb_reg; + unsigned short scode; + char memmod_str[64], *p; - printk(KERN_WARNING "CPU[%d]: Correctable ECC Error " - "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx]\n", - smp_processor_id(), - (ce_status & CE_STATUS_AFSR_MASK), - afar, - ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL), - ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL)); - - udb_reg = ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL); - if (udb_reg & (1 << 8)) { - scode = ecc_syndrome_table[udb_reg & 0xff]; + if (udbl & bit) { + scode = ecc_syndrome_table[udbl & 0xff]; if (prom_getunumber(scode, afar, memmod_str, sizeof(memmod_str)) == -1) p = syndrome_unknown; @@ -418,9 +324,8 @@ asmlinkage void cee_log(unsigned long ce_status, smp_processor_id(), scode, p); } - udb_reg = ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL); - if (udb_reg & (1 << 8)) { - scode = ecc_syndrome_table[udb_reg & 0xff]; + if (udbh & bit) { + scode = ecc_syndrome_table[udbh & 0xff]; if (prom_getunumber(scode, afar, memmod_str, sizeof(memmod_str)) == -1) p = syndrome_unknown; @@ -430,6 +335,127 @@ asmlinkage void cee_log(unsigned long ce_status, "Memory Module \"%s\"\n", smp_processor_id(), scode, p); } + +} + +static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs) +{ + + printk(KERN_WARNING "CPU[%d]: Correctable ECC Error " + "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n", + smp_processor_id(), afsr, afar, udbl, udbh, tl1); + + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE); + + /* We always log it, even if someone is listening for this + * trap. + */ + notify_die(DIE_TRAP, "Correctable ECC Error", regs, + 0, TRAP_TYPE_CEE, SIGTRAP); + + /* The Correctable ECC Error trap does not disable I/D caches. So + * we only have to restore the ESTATE Error Enable register. + */ + spitfire_enable_estate_errors(); +} + +static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs) +{ + siginfo_t info; + + printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] " + "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n", + smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1); + + /* XXX add more human friendly logging of the error status + * XXX as is implemented for cheetah + */ + + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE); + + /* We always log it, even if someone is listening for this + * trap. + */ + notify_die(DIE_TRAP, "Uncorrectable Error", regs, + 0, tt, SIGTRAP); + + if (regs->tstate & TSTATE_PRIV) { + if (tl1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("UE", regs); + } + + /* XXX need more intelligent processing here, such as is implemented + * XXX for cheetah errors, in fact if the E-cache still holds the + * XXX line with bad parity this will loop + */ + + spitfire_clean_and_reenable_l1_caches(); + spitfire_enable_estate_errors(); + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_OBJERR; + info.si_addr = (void *)0; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + +void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) +{ + unsigned long afsr, tt, udbh, udbl; + int tl1; + + afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT; + tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT; + tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0; + udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT; + udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT; + +#ifdef CONFIG_PCI + if (tt == TRAP_TYPE_DAE && + pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { + spitfire_clean_and_reenable_l1_caches(); + spitfire_enable_estate_errors(); + + pci_poke_faulted = 1; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + if (afsr & SFAFSR_UE) + spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs); + + if (tt == TRAP_TYPE_CEE) { + /* Handle the case where we took a CEE trap, but ACK'd + * only the UE state in the UDB error registers. + */ + if (afsr & SFAFSR_UE) { + if (udbh & UDBE_CE) { + __asm__ __volatile__( + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (udbh & UDBE_CE), + "r" (0x0), "i" (ASI_UDB_ERROR_W)); + } + if (udbl & UDBE_CE) { + __asm__ __volatile__( + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (udbl & UDBE_CE), + "r" (0x18), "i" (ASI_UDB_ERROR_W)); + } + } + + spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs); + } } int cheetah_pcache_forced_on; diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 491bb3681f9d..8365bc1f81f3 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -18,9 +18,10 @@ sparc64_ttable_tl0: tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) tl0_iax: membar #Sync - TRAP_NOSAVE_7INSNS(__do_instruction_access_exception) + TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception) tl0_resv009: BTRAP(0x9) -tl0_iae: TRAP(do_iae) +tl0_iae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) tl0_ill: membar #Sync TRAP_7INSNS(do_illegal_instruction) @@ -36,9 +37,10 @@ tl0_cwin: CLEAN_WINDOW tl0_div0: TRAP(do_div0) tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) tl0_resv02f: BTRAP(0x2f) -tl0_dax: TRAP_NOSAVE(__do_data_access_exception) +tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception) tl0_resv031: BTRAP(0x31) -tl0_dae: TRAP(do_dae) +tl0_dae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv033: BTRAP(0x33) tl0_mna: TRAP_NOSAVE(do_mna) tl0_lddfmna: TRAP_NOSAVE(do_lddfmna) @@ -73,7 +75,8 @@ tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f) tl0_ivec: TRAP_IVEC tl0_paw: TRAP(do_paw) tl0_vaw: TRAP(do_vaw) -tl0_cee: TRAP_NOSAVE(cee_trap) +tl0_cee: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_cee_trap) tl0_iamiss: #include "itlb_base.S" tl0_damiss: @@ -175,9 +178,10 @@ tl0_resv1f0: BTRAPS(0x1f0) BTRAPS(0x1f8) sparc64_ttable_tl1: tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) -tl1_iax: TRAP_NOSAVE(__do_instruction_access_exception_tl1) +tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1) tl1_resv009: BTRAPTL1(0x9) -tl1_iae: TRAPTL1(do_iae_tl1) +tl1_iae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) tl1_ill: TRAPTL1(do_ill_tl1) tl1_privop: BTRAPTL1(0x11) @@ -193,9 +197,10 @@ tl1_cwin: CLEAN_WINDOW tl1_div0: TRAPTL1(do_div0_tl1) tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) -tl1_dax: TRAP_NOSAVE(__do_data_access_exception_tl1) +tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1) tl1_resv031: BTRAPTL1(0x31) -tl1_dae: TRAPTL1(do_dae_tl1) +tl1_dae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv033: BTRAPTL1(0x33) tl1_mna: TRAP_NOSAVE(do_mna) tl1_lddfmna: TRAPTL1(do_lddfmna_tl1) @@ -219,8 +224,8 @@ tl1_paw: TRAPTL1(do_paw_tl1) tl1_vaw: TRAPTL1(do_vaw_tl1) /* The grotty trick to save %g1 into current->thread.cee_stuff - * is because when we take this trap we could be interrupting trap - * code already using the trap alternate global registers. + * is because when we take this trap we could be interrupting + * trap code already using the trap alternate global registers. * * We cross our fingers and pray that this store/load does * not cause yet another CEE trap. diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 11c3e88732e4..da9739f0d437 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -349,9 +349,9 @@ int handle_popc(u32 insn, struct pt_regs *regs) extern void do_fpother(struct pt_regs *regs); extern void do_privact(struct pt_regs *regs); -extern void data_access_exception(struct pt_regs *regs, - unsigned long sfsr, - unsigned long sfar); +extern void spitfire_data_access_exception(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); int handle_ldf_stq(u32 insn, struct pt_regs *regs) { @@ -394,14 +394,14 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) break; } default: - data_access_exception(regs, 0, addr); + spitfire_data_access_exception(regs, 0, addr); return 1; } if (put_user (first >> 32, (u32 __user *)addr) || __put_user ((u32)first, (u32 __user *)(addr + 4)) || __put_user (second >> 32, (u32 __user *)(addr + 8)) || __put_user ((u32)second, (u32 __user *)(addr + 12))) { - data_access_exception(regs, 0, addr); + spitfire_data_access_exception(regs, 0, addr); return 1; } } else { @@ -414,7 +414,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) do_privact(regs); return 1; } else if (asi > ASI_SNFL) { - data_access_exception(regs, 0, addr); + spitfire_data_access_exception(regs, 0, addr); return 1; } switch (insn & 0x180000) { @@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); } if (err && !(asi & 0x2 /* NF */)) { - data_access_exception(regs, 0, addr); + spitfire_data_access_exception(regs, 0, addr); return 1; } if (asi & 0x8) /* Little */ { @@ -534,7 +534,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr *(u64 *)(f->regs + freg) = value; current_thread_info()->fpsaved[0] |= flag; } else { -daex: data_access_exception(regs, sfsr, sfar); +daex: spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); @@ -578,7 +578,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr __put_user ((u32)value, (u32 __user *)(sfar + 4))) goto daex; } else { -daex: data_access_exception(regs, sfsr, sfar); +daex: spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index dfbc7e0dcf70..99c809a1e5ac 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -318,7 +318,7 @@ fill_fixup_dax: nop rdpr %pstate, %l1 ! Prepare to change globals. mov %g4, %o1 ! Setup args for - mov %g5, %o2 ! final call to data_access_exception. + mov %g5, %o2 ! final call to spitfire_data_access_exception. andn %l1, PSTATE_MM, %l1 ! We want to be in RMO mov %g6, %o7 ! Stash away current. @@ -330,7 +330,7 @@ fill_fixup_dax: mov TSB_REG, %g1 ldxa [%g1] ASI_IMMU, %g5 #endif - call data_access_exception + call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap @@ -391,7 +391,7 @@ window_dax_from_user_common: 109: or %g7, %lo(109b), %g7 mov %l4, %o1 mov %l5, %o2 - call data_access_exception + call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap clr %l6 diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h new file mode 100644 index 000000000000..2f792c20b53c --- /dev/null +++ b/include/asm-sparc64/sfafsr.h @@ -0,0 +1,82 @@ +#ifndef _SPARC64_SFAFSR_H +#define _SPARC64_SFAFSR_H + +#include + +/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */ + +#define SFAFSR_ME (_AC(1,UL) << SFAFSR_ME_SHIFT) +#define SFAFSR_ME_SHIFT 32 +#define SFAFSR_PRIV (_AC(1,UL) << SFAFSR_PRIV_SHIFT) +#define SFAFSR_PRIV_SHIFT 31 +#define SFAFSR_ISAP (_AC(1,UL) << SFAFSR_ISAP_SHIFT) +#define SFAFSR_ISAP_SHIFT 30 +#define SFAFSR_ETP (_AC(1,UL) << SFAFSR_ETP_SHIFT) +#define SFAFSR_ETP_SHIFT 29 +#define SFAFSR_IVUE (_AC(1,UL) << SFAFSR_IVUE_SHIFT) +#define SFAFSR_IVUE_SHIFT 28 +#define SFAFSR_TO (_AC(1,UL) << SFAFSR_TO_SHIFT) +#define SFAFSR_TO_SHIFT 27 +#define SFAFSR_BERR (_AC(1,UL) << SFAFSR_BERR_SHIFT) +#define SFAFSR_BERR_SHIFT 26 +#define SFAFSR_LDP (_AC(1,UL) << SFAFSR_LDP_SHIFT) +#define SFAFSR_LDP_SHIFT 25 +#define SFAFSR_CP (_AC(1,UL) << SFAFSR_CP_SHIFT) +#define SFAFSR_CP_SHIFT 24 +#define SFAFSR_WP (_AC(1,UL) << SFAFSR_WP_SHIFT) +#define SFAFSR_WP_SHIFT 23 +#define SFAFSR_EDP (_AC(1,UL) << SFAFSR_EDP_SHIFT) +#define SFAFSR_EDP_SHIFT 22 +#define SFAFSR_UE (_AC(1,UL) << SFAFSR_UE_SHIFT) +#define SFAFSR_UE_SHIFT 21 +#define SFAFSR_CE (_AC(1,UL) << SFAFSR_CE_SHIFT) +#define SFAFSR_CE_SHIFT 20 +#define SFAFSR_ETS (_AC(0xf,UL) << SFAFSR_ETS_SHIFT) +#define SFAFSR_ETS_SHIFT 16 +#define SFAFSR_PSYND (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT) +#define SFAFSR_PSYND_SHIFT 0 + +/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read + * ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write + */ + +#define UDBE_UE (_AC(1,UL) << 9) +#define UDBE_CE (_AC(1,UL) << 8) +#define UDBE_E_SYNDR (_AC(0xff,UL) << 0) + +/* The trap handlers for asynchronous errors encode the AFSR and + * other pieces of information into a 64-bit argument for C code + * encoded as follows: + * + * ----------------------------------------------- + * | UDB_H | UDB_L | TL>1 | TT | AFSR | + * ----------------------------------------------- + * 63 54 53 44 42 41 33 32 0 + * + * The AFAR is passed in unchanged. + */ +#define SFSTAT_UDBH_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) +#define SFSTAT_UDBH_SHIFT 54 +#define SFSTAT_UDBL_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) +#define SFSTAT_UDBL_SHIFT 44 +#define SFSTAT_TL_GT_ONE (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT) +#define SFSTAT_TL_GT_ONE_SHIFT 42 +#define SFSTAT_TRAP_TYPE (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT) +#define SFSTAT_TRAP_TYPE_SHIFT 33 +#define SFSTAT_AFSR_MASK (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT) +#define SFSTAT_AFSR_SHIFT 0 + +/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */ +#define ESTATE_ERR_CE 0x1 /* Correctable errors */ +#define ESTATE_ERR_NCE 0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */ +#define ESTATE_ERR_ISAP 0x4 /* System address parity error */ +#define ESTATE_ERR_ALL (ESTATE_ERR_CE | \ + ESTATE_ERR_NCE | \ + ESTATE_ERR_ISAP) + +/* The various trap types that report using the above state. */ +#define TRAP_TYPE_IAE 0x09 /* Instruction Access Error */ +#define TRAP_TYPE_DAE 0x32 /* Data Access Error */ +#define TRAP_TYPE_CEE 0x63 /* Correctable ECC Error */ + +#endif /* _SPARC64_SFAFSR_H */ From 3d6364abcfdaedeb34418c2894f61251d48614f6 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 29 Aug 2005 12:45:30 -0700 Subject: [PATCH 263/584] [SPARC64]: remove use of asm/segment.h Removed sparc64 architecture specific users of asm/segment.h and asm-sparc64/segment.h itself Signed-off-by: Kumar Gala Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 1 - include/asm-sparc64/processor.h | 1 - include/asm-sparc64/segment.h | 6 ------ 3 files changed, 8 deletions(-) delete mode 100644 include/asm-sparc64/segment.h diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index b7e6a91952b2..fbdfed3798d8 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -33,7 +33,6 @@ #include #include -#include #include #include #include diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h index d0bee2413560..3169f3e2237e 100644 --- a/include/asm-sparc64/processor.h +++ b/include/asm-sparc64/processor.h @@ -18,7 +18,6 @@ #include #include #include -#include #include /* The sparc has no problems with write protection */ diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h deleted file mode 100644 index b03e709fc945..000000000000 --- a/include/asm-sparc64/segment.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC64_SEGMENT_H -#define __SPARC64_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif From ca7c8d2c1e2a2f2445cb5e00f45b93af57f22c1b Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 29 Aug 2005 12:45:44 -0700 Subject: [PATCH 264/584] [SPARC]: remove use of asm/segment.h Removed sparc architecture specific users of asm/segment.h and asm-sparc/segment.h itself Signed-off-by: Kumar Gala Signed-off-by: David S. Miller --- arch/sparc/kernel/setup.c | 1 - arch/sparc/kernel/tick14.c | 1 - arch/sparc/kernel/time.c | 1 - arch/sparc/mm/fault.c | 1 - arch/sparc/mm/init.c | 1 - include/asm-sparc/processor.h | 1 - include/asm-sparc/segment.h | 6 ------ include/asm-sparc/system.h | 1 - 8 files changed, 13 deletions(-) delete mode 100644 include/asm-sparc/segment.h diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c index 55352ed85e8a..53c192a4982f 100644 --- a/arch/sparc/kernel/setup.c +++ b/arch/sparc/kernel/setup.c @@ -32,7 +32,6 @@ #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/tick14.c b/arch/sparc/kernel/tick14.c index fd8005a3e6bd..591547af4c65 100644 --- a/arch/sparc/kernel/tick14.c +++ b/arch/sparc/kernel/tick14.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 6486cbf2efe9..3b759aefc170 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -32,7 +32,6 @@ #include #include -#include #include #include #include diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c index 37f4107bae66..2bbd53f3cafb 100644 --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index ec2e05028a10..c03babaa0498 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h index 32c9699367cf..5a7a1a8d29ac 100644 --- a/include/asm-sparc/processor.h +++ b/include/asm-sparc/processor.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h deleted file mode 100644 index a1b7ffc9eec9..000000000000 --- a/include/asm-sparc/segment.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SEGMENT_H -#define __SPARC_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 898562ebe94c..3557781a4bfd 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -9,7 +9,6 @@ #include /* NR_CPUS */ #include -#include #include #include #include From 442464a50077ff00454ff8d7628cbe1b8eacc034 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:46:07 -0700 Subject: [PATCH 265/584] [SPARC64]: Make debugging spinlocks usable again. When the spinlock routines were moved out of line into kernel/spinlock.c this made it so that the debugging spinlocks record lock acquisition program counts in the kernel/spinlock.c functions not in their callers. This makes the debugging info kind of useless. So record the correct caller's program counter and now this feature is useful once more. Signed-off-by: David S. Miller --- arch/sparc64/kernel/sparc64_ksyms.c | 23 ----------------- arch/sparc64/lib/debuglocks.c | 40 ++++++++++------------------- include/asm-sparc64/spinlock.h | 38 ++++++++++++++++----------- 3 files changed, 37 insertions(+), 64 deletions(-) diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 9202d925a9ce..0764b9330784 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -99,17 +99,6 @@ extern int __ashrdi3(int, int); extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); -#if defined(CONFIG_SMP) && defined(CONFIG_DEBUG_SPINLOCK) -extern void _do_spin_lock (spinlock_t *lock, char *str); -extern void _do_spin_unlock (spinlock_t *lock); -extern int _spin_trylock (spinlock_t *lock); -extern void _do_read_lock(rwlock_t *rw, char *str); -extern void _do_read_unlock(rwlock_t *rw, char *str); -extern void _do_write_lock(rwlock_t *rw, char *str); -extern void _do_write_unlock(rwlock_t *rw); -extern int _do_write_trylock(rwlock_t *rw, char *str); -#endif - extern unsigned long phys_base; extern unsigned long pfn_base; @@ -152,18 +141,6 @@ EXPORT_SYMBOL(_mcount); EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(phys_cpu_present_map); -/* Spinlock debugging library, optional. */ -#ifdef CONFIG_DEBUG_SPINLOCK -EXPORT_SYMBOL(_do_spin_lock); -EXPORT_SYMBOL(_do_spin_unlock); -EXPORT_SYMBOL(_spin_trylock); -EXPORT_SYMBOL(_do_read_lock); -EXPORT_SYMBOL(_do_read_unlock); -EXPORT_SYMBOL(_do_write_lock); -EXPORT_SYMBOL(_do_write_unlock); -EXPORT_SYMBOL(_do_write_trylock); -#endif - EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index f03344cf784e..7f6ccc4114c5 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -12,8 +12,6 @@ #ifdef CONFIG_SMP -#define GET_CALLER(PC) __asm__ __volatile__("mov %%i7, %0" : "=r" (PC)) - static inline void show (char *str, spinlock_t *lock, unsigned long caller) { int cpu = smp_processor_id(); @@ -51,14 +49,13 @@ static inline void show_write (char *str, rwlock_t *lock, unsigned long caller) #undef INIT_STUCK #define INIT_STUCK 100000000 -void _do_spin_lock(spinlock_t *lock, char *str) +void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int stuck = INIT_STUCK; int cpu = get_cpu(); int shown = 0; - GET_CALLER(caller); again: __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) @@ -84,12 +81,11 @@ again: put_cpu(); } -int _do_spin_trylock(spinlock_t *lock) +int _do_spin_trylock(spinlock_t *lock, unsigned long caller) { - unsigned long val, caller; + unsigned long val; int cpu = get_cpu(); - GET_CALLER(caller); __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock)) @@ -118,14 +114,13 @@ void _do_spin_unlock(spinlock_t *lock) /* Keep INIT_STUCK the same... */ -void _do_read_lock (rwlock_t *rw, char *str) +void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int stuck = INIT_STUCK; int cpu = get_cpu(); int shown = 0; - GET_CALLER(caller); wlock_again: /* Wait for any writer to go away. */ while (((long)(rw->lock)) < 0) { @@ -157,15 +152,13 @@ wlock_again: put_cpu(); } -void _do_read_unlock (rwlock_t *rw, char *str) +void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int stuck = INIT_STUCK; int cpu = get_cpu(); int shown = 0; - GET_CALLER(caller); - /* Drop our identity _first_. */ rw->reader_pc[cpu] = 0; current->thread.smp_lock_count--; @@ -193,14 +186,13 @@ runlock_again: put_cpu(); } -void _do_write_lock (rwlock_t *rw, char *str) +void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int stuck = INIT_STUCK; int cpu = get_cpu(); int shown = 0; - GET_CALLER(caller); wlock_again: /* Spin while there is another writer. */ while (((long)rw->lock) < 0) { @@ -278,14 +270,12 @@ wlock_again: put_cpu(); } -void _do_write_unlock(rwlock_t *rw) +void _do_write_unlock(rwlock_t *rw, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int stuck = INIT_STUCK; int shown = 0; - GET_CALLER(caller); - /* Drop our identity _first_ */ rw->writer_pc = 0; rw->writer_cpu = NO_PROC_ID; @@ -313,13 +303,11 @@ wlock_again: } } -int _do_write_trylock (rwlock_t *rw, char *str) +int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller) { - unsigned long caller, val; + unsigned long val; int cpu = get_cpu(); - GET_CALLER(caller); - /* Try to acuire the write bit. */ __asm__ __volatile__( " mov 1, %%g3\n" diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index 9cb93a5c2b4f..d265bf6570fe 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h @@ -132,12 +132,15 @@ do { \ membar("#LoadLoad"); \ } while((__lock)->lock) -extern void _do_spin_lock (spinlock_t *lock, char *str); -extern void _do_spin_unlock (spinlock_t *lock); -extern int _do_spin_trylock (spinlock_t *lock); +extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller); +extern void _do_spin_unlock(spinlock_t *lock); +extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller); -#define _raw_spin_trylock(lp) _do_spin_trylock(lp) -#define _raw_spin_lock(lock) _do_spin_lock(lock, "spin_lock") +#define _raw_spin_trylock(lp) \ + _do_spin_trylock(lp, (unsigned long) __builtin_return_address(0)) +#define _raw_spin_lock(lock) \ + _do_spin_lock(lock, "spin_lock", \ + (unsigned long) __builtin_return_address(0)) #define _raw_spin_unlock(lock) _do_spin_unlock(lock) #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) @@ -279,37 +282,41 @@ typedef struct { #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } } #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) -extern void _do_read_lock(rwlock_t *rw, char *str); -extern void _do_read_unlock(rwlock_t *rw, char *str); -extern void _do_write_lock(rwlock_t *rw, char *str); -extern void _do_write_unlock(rwlock_t *rw); -extern int _do_write_trylock(rwlock_t *rw, char *str); +extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_write_unlock(rwlock_t *rw, unsigned long caller); +extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller); #define _raw_read_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_read_lock(lock, "read_lock"); \ + _do_read_lock(lock, "read_lock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_read_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_read_unlock(lock, "read_unlock"); \ + _do_read_unlock(lock, "read_unlock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_write_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_write_lock(lock, "write_lock"); \ + _do_write_lock(lock, "write_lock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_write_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_write_unlock(lock); \ + _do_write_unlock(lock, \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) @@ -317,7 +324,8 @@ do { unsigned long flags; \ ({ unsigned long flags; \ int val; \ local_irq_save(flags); \ - val = _do_write_trylock(lock, "write_trylock"); \ + val = _do_write_trylock(lock, "write_trylock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ val; \ }) From 4f07118f656c179740cad35b827032e2e29b1210 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:46:22 -0700 Subject: [PATCH 266/584] [SPARC64]: More fully work around Spitfire Errata 51. It appears that a memory barrier soon after a mispredicted branch, not just in the delay slot, can cause the hang condition of this cpu errata. So move them out-of-line, and explicitly put them into a "branch always, predict taken" delay slot which should fully kill this problem. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 2 +- arch/sparc64/kernel/process.c | 2 +- arch/sparc64/kernel/sbus.c | 2 +- arch/sparc64/kernel/signal32.c | 22 +++++---- arch/sparc64/kernel/smp.c | 30 ++++++------ arch/sparc64/kernel/sparc64_ksyms.c | 9 ++++ arch/sparc64/lib/Makefile | 2 +- arch/sparc64/lib/debuglocks.c | 16 +++---- arch/sparc64/lib/mb.S | 73 +++++++++++++++++++++++++++++ arch/sparc64/solaris/misc.c | 6 ++- include/asm-sparc64/atomic.h | 8 ++-- include/asm-sparc64/bitops.h | 4 +- include/asm-sparc64/spinlock.h | 4 +- include/asm-sparc64/system.h | 17 ++++--- 14 files changed, 143 insertions(+), 54 deletions(-) create mode 100644 arch/sparc64/lib/mb.S diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 2803bc7c2c79..425c60cfea19 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -466,7 +466,7 @@ do_flush_sync: if (!limit) break; udelay(1); - membar("#LoadLoad"); + rmb(); } if (!limit) printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout " diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 07424b075938..66255434128a 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -103,7 +103,7 @@ void cpu_idle(void) * other cpus see our increasing idleness for the buddy * redistribution algorithm. -DaveM */ - membar("#StoreStore | #StoreLoad"); + membar_storeload_storestore(); } } diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index 89f5e019f24c..e09ddf927655 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -147,7 +147,7 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long if (!limit) break; udelay(1); - membar("#LoadLoad"); + rmb(); } if (!limit) printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout " diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index b1ed23091fbb..aecccd0df1d1 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -877,11 +877,12 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, unsigned long page = (unsigned long) page_address(pte_page(*ptep)); - __asm__ __volatile__( - " membar #StoreStore\n" - " flush %0 + %1" - : : "r" (page), "r" (address & (PAGE_SIZE - 1)) - : "memory"); + wmb(); + __asm__ __volatile__("flush %0 + %1" + : /* no outputs */ + : "r" (page), + "r" (address & (PAGE_SIZE - 1)) + : "memory"); } pte_unmap(ptep); preempt_enable(); @@ -1292,11 +1293,12 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, unsigned long page = (unsigned long) page_address(pte_page(*ptep)); - __asm__ __volatile__( - " membar #StoreStore\n" - " flush %0 + %1" - : : "r" (page), "r" (address & (PAGE_SIZE - 1)) - : "memory"); + wmb(); + __asm__ __volatile__("flush %0 + %1" + : /* no outputs */ + : "r" (page), + "r" (address & (PAGE_SIZE - 1)) + : "memory"); } pte_unmap(ptep); preempt_enable(); diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index b9b42491e118..b4fc6a5462b2 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -144,7 +144,7 @@ void __init smp_callin(void) current->active_mm = &init_mm; while (!cpu_isset(cpuid, smp_commenced_mask)) - membar("#LoadLoad"); + rmb(); cpu_set(cpuid, cpu_online_map); } @@ -184,11 +184,11 @@ static inline long get_delta (long *rt, long *master) for (i = 0; i < NUM_ITERS; i++) { t0 = tick_ops->get_tick(); go[MASTER] = 1; - membar("#StoreLoad"); + membar_storeload(); while (!(tm = go[SLAVE])) - membar("#LoadLoad"); + rmb(); go[SLAVE] = 0; - membar("#StoreStore"); + wmb(); t1 = tick_ops->get_tick(); if (t1 - t0 < best_t1 - best_t0) @@ -221,7 +221,7 @@ void smp_synchronize_tick_client(void) go[MASTER] = 1; while (go[MASTER]) - membar("#LoadLoad"); + rmb(); local_irq_save(flags); { @@ -273,21 +273,21 @@ static void smp_synchronize_one_tick(int cpu) /* wait for client to be ready */ while (!go[MASTER]) - membar("#LoadLoad"); + rmb(); /* now let the client proceed into his loop */ go[MASTER] = 0; - membar("#StoreLoad"); + membar_storeload(); spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) - membar("#LoadLoad"); + rmb(); go[MASTER] = 0; - membar("#StoreStore"); + wmb(); go[SLAVE] = tick_ops->get_tick(); - membar("#StoreLoad"); + membar_storeload(); } } spin_unlock_irqrestore(&itc_sync_lock, flags); @@ -927,11 +927,11 @@ void smp_capture(void) smp_processor_id()); #endif penguins_are_doing_time = 1; - membar("#StoreStore | #LoadStore"); + membar_storestore_loadstore(); atomic_inc(&smp_capture_registry); smp_cross_call(&xcall_capture, 0, 0, 0); while (atomic_read(&smp_capture_registry) != ncpus) - membar("#LoadLoad"); + rmb(); #ifdef CAPTURE_DEBUG printk("done\n"); #endif @@ -947,7 +947,7 @@ void smp_release(void) smp_processor_id()); #endif penguins_are_doing_time = 0; - membar("#StoreStore | #StoreLoad"); + membar_storeload_storestore(); atomic_dec(&smp_capture_registry); } } @@ -970,9 +970,9 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) save_alternate_globals(global_save); prom_world(1); atomic_inc(&smp_capture_registry); - membar("#StoreLoad | #StoreStore"); + membar_storeload_storestore(); while (penguins_are_doing_time) - membar("#LoadLoad"); + rmb(); restore_alternate_globals(global_save); atomic_dec(&smp_capture_registry); prom_world(0); diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 0764b9330784..a3ea697f1adb 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -406,3 +406,12 @@ EXPORT_SYMBOL(xor_vis_4); EXPORT_SYMBOL(xor_vis_5); EXPORT_SYMBOL(prom_palette); + +/* memory barriers */ +EXPORT_SYMBOL(mb); +EXPORT_SYMBOL(rmb); +EXPORT_SYMBOL(wmb); +EXPORT_SYMBOL(membar_storeload); +EXPORT_SYMBOL(membar_storeload_storestore); +EXPORT_SYMBOL(membar_storeload_loadload); +EXPORT_SYMBOL(membar_storestore_loadstore); diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 40dbeec7e5d6..6201f1040982 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ copy_in_user.o user_fixup.o memmove.o \ - mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o + mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index 7f6ccc4114c5..f5f0b5586f01 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -61,7 +61,7 @@ again: : "=r" (val) : "r" (&(lock->lock)) : "memory"); - membar("#StoreLoad | #StoreStore"); + membar_storeload_storestore(); if (val) { while (lock->lock) { if (!--stuck) { @@ -69,7 +69,7 @@ again: show(str, lock, caller); stuck = INIT_STUCK; } - membar("#LoadLoad"); + rmb(); } goto again; } @@ -90,7 +90,7 @@ int _do_spin_trylock(spinlock_t *lock, unsigned long caller) : "=r" (val) : "r" (&(lock->lock)) : "memory"); - membar("#StoreLoad | #StoreStore"); + membar_storeload_storestore(); if (!val) { lock->owner_pc = ((unsigned int)caller); lock->owner_cpu = cpu; @@ -107,7 +107,7 @@ void _do_spin_unlock(spinlock_t *lock) { lock->owner_pc = 0; lock->owner_cpu = NO_PROC_ID; - membar("#StoreStore | #LoadStore"); + membar_storestore_loadstore(); lock->lock = 0; current->thread.smp_lock_count--; } @@ -129,7 +129,7 @@ wlock_again: show_read(str, rw, caller); stuck = INIT_STUCK; } - membar("#LoadLoad"); + rmb(); } /* Try once to increment the counter. */ __asm__ __volatile__( @@ -142,7 +142,7 @@ wlock_again: "2:" : "=r" (val) : "0" (&(rw->lock)) : "g1", "g7", "memory"); - membar("#StoreLoad | #StoreStore"); + membar_storeload_storestore(); if (val) goto wlock_again; rw->reader_pc[cpu] = ((unsigned int)caller); @@ -201,7 +201,7 @@ wlock_again: show_write(str, rw, caller); stuck = INIT_STUCK; } - membar("#LoadLoad"); + rmb(); } /* Try to acuire the write bit. */ @@ -256,7 +256,7 @@ wlock_again: show_write(str, rw, caller); stuck = INIT_STUCK; } - membar("#LoadLoad"); + rmb(); } goto wlock_again; } diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S new file mode 100644 index 000000000000..4004f748619f --- /dev/null +++ b/arch/sparc64/lib/mb.S @@ -0,0 +1,73 @@ +/* mb.S: Out of line memory barriers. + * + * Copyright (C) 2005 David S. Miller (davem@davemloft.net) + */ + + /* These are here in an effort to more fully work around + * Spitfire Errata #51. Essentially, if a memory barrier + * occurs soon after a mispredicted branch, the chip can stop + * executing instructions until a trap occurs. Therefore, if + * interrupts are disabled, the chip can hang forever. + * + * It used to be believed that the memory barrier had to be + * right in the delay slot, but a case has been traced + * recently wherein the memory barrier was one instruction + * after the branch delay slot and the chip still hung. The + * offending sequence was the following in sym_wakeup_done() + * of the sym53c8xx_2 driver: + * + * call sym_ccb_from_dsa, 0 + * movge %icc, 0, %l0 + * brz,pn %o0, .LL1303 + * mov %o0, %l2 + * membar #LoadLoad + * + * The branch has to be mispredicted for the bug to occur. + * Therefore, we put the memory barrier explicitly into a + * "branch always, predicted taken" delay slot to avoid the + * problem case. + */ + + .text + +99: retl + nop + + .globl mb +mb: ba,pt %xcc, 99b + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + .size mb, .-mb + + .globl rmb +rmb: ba,pt %xcc, 99b + membar #LoadLoad + .size rmb, .-rmb + + .globl wmb +wmb: ba,pt %xcc, 99b + membar #StoreStore + .size wmb, .-wmb + + .globl membar_storeload +membar_storeload: + ba,pt %xcc, 99b + membar #StoreLoad + .size membar_storeload, .-membar_storeload + + .globl membar_storeload_storestore +membar_storeload_storestore: + ba,pt %xcc, 99b + membar #StoreLoad | #StoreStore + .size membar_storeload_storestore, .-membar_storeload_storestore + + .globl membar_storeload_loadload +membar_storeload_loadload: + ba,pt %xcc, 99b + membar #StoreLoad | #LoadLoad + .size membar_storeload_loadload, .-membar_storeload_loadload + + .globl membar_storestore_loadstore +membar_storestore_loadstore: + ba,pt %xcc, 99b + membar #StoreStore | #LoadStore + .size membar_storestore_loadstore, .-membar_storestore_loadstore diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c index 15b4cfe07557..302efbcba70e 100644 --- a/arch/sparc64/solaris/misc.c +++ b/arch/sparc64/solaris/misc.c @@ -737,7 +737,8 @@ MODULE_LICENSE("GPL"); extern u32 tl0_solaris[8]; #define update_ttable(x) \ tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \ - __asm__ __volatile__ ("membar #StoreStore; flush %0" : : "r" (&tl0_solaris[3])) + wmb(); \ + __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3])) #else #endif @@ -761,7 +762,8 @@ int init_module(void) entry64_personality_patch |= (offsetof(struct task_struct, personality) + (sizeof(unsigned long) - 1)); - __asm__ __volatile__("membar #StoreStore; flush %0" + wmb(); + __asm__ __volatile__("flush %0" : : "r" (&entry64_personality_patch)); return 0; } diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h index d80f3379669b..e175afcf2cde 100644 --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h @@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *); /* Atomic operations are already serializing */ #ifdef CONFIG_SMP -#define smp_mb__before_atomic_dec() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_atomic_dec() membar("#StoreLoad | #StoreStore") -#define smp_mb__before_atomic_inc() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_atomic_inc() membar("#StoreLoad | #StoreStore") +#define smp_mb__before_atomic_dec() membar_storeload_loadload(); +#define smp_mb__after_atomic_dec() membar_storeload_storestore(); +#define smp_mb__before_atomic_inc() membar_storeload_loadload(); +#define smp_mb__after_atomic_inc() membar_storeload_storestore(); #else #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h index 9c5e71970287..6388b8376c50 100644 --- a/include/asm-sparc64/bitops.h +++ b/include/asm-sparc64/bitops.h @@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) } #ifdef CONFIG_SMP -#define smp_mb__before_clear_bit() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_clear_bit() membar("#StoreLoad | #StoreStore") +#define smp_mb__before_clear_bit() membar_storeload_loadload() +#define smp_mb__after_clear_bit() membar_storeload_storestore() #else #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index d265bf6570fe..a02c4370eb42 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h @@ -43,7 +43,7 @@ typedef struct { #define spin_is_locked(lp) ((lp)->lock != 0) #define spin_unlock_wait(lp) \ -do { membar("#LoadLoad"); \ +do { rmb(); \ } while((lp)->lock) static inline void _raw_spin_lock(spinlock_t *lock) @@ -129,7 +129,7 @@ typedef struct { #define spin_is_locked(__lock) ((__lock)->lock != 0) #define spin_unlock_wait(__lock) \ do { \ - membar("#LoadLoad"); \ + rmb(); \ } while((__lock)->lock) extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller); diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index ee4bdfc6b88f..5e94c05dc2fc 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -28,6 +28,14 @@ enum sparc_cpu { #define ARCH_SUN4C_SUN4 0 #define ARCH_SUN4 0 +extern void mb(void); +extern void rmb(void); +extern void wmb(void); +extern void membar_storeload(void); +extern void membar_storeload_storestore(void); +extern void membar_storeload_loadload(void); +extern void membar_storestore_loadstore(void); + #endif #define setipl(__new_ipl) \ @@ -78,16 +86,11 @@ enum sparc_cpu { #define nop() __asm__ __volatile__ ("nop") -#define membar(type) __asm__ __volatile__ ("membar " type : : : "memory") -#define mb() \ - membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") -#define rmb() membar("#LoadLoad") -#define wmb() membar("#StoreStore") #define read_barrier_depends() do { } while(0) #define set_mb(__var, __value) \ - do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0) + do { __var = __value; membar_storeload_storestore(); } while(0) #define set_wmb(__var, __value) \ - do { __var = __value; membar("#StoreStore"); } while(0) + do { __var = __value; wmb(); } while(0) #ifdef CONFIG_SMP #define smp_mb() mb() From bf3a46aa9b96f6eb3a49a568f72a2801c3e830c0 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:22:01 -0700 Subject: [PATCH 267/584] [NETFILTER]: convert nfmark and conntrack mark to 32bit As discussed at netconf'05, we convert nfmark and conntrack-mark to be 32bits even on 64bit architectures. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 +- include/linux/skbuff.h | 2 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_CONNMARK.c | 11 ++++++++--- net/ipv4/netfilter/ipt_MARK.c | 12 ++++++++++++ net/ipv4/netfilter/ipt_connmark.c | 7 +++++++ net/ipv4/netfilter/ipt_mark.c | 7 +++++++ 8 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 08fe5f7d14a0..4ed720f0c4cd 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -171,7 +171,7 @@ struct ip_conntrack #endif /* CONFIG_IP_NF_NAT_NEEDED */ #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - unsigned long mark; + u_int32_t mark; #endif /* Traversed often, so hopefully in different cacheline to top */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 948527e42a60..2e40f4c9f7a6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -259,7 +259,7 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + __u32 nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 61798c46e91d..dccd4abab7ae 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%lu ", conntrack->mark)) + if (seq_printf(s, "mark=%u ", conntrack->mark)) return -ENOSPC; #endif diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6706d3a1bc4f..2d05cafec221 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,7 +367,7 @@ target(struct sk_buff **pskb, #ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); + DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 30ddd3e18eb7..8ed744157b1a 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -40,9 +40,9 @@ target(struct sk_buff **pskb, void *userinfo) { const struct ipt_connmark_target_info *markinfo = targinfo; - unsigned long diff; - unsigned long nfmark; - unsigned long newmark; + u_int32_t diff; + u_int32_t nfmark; + u_int32_t newmark; enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); @@ -94,6 +94,11 @@ checkentry(const char *tablename, } } + if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { + printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 33c6f9b63b8d..8526398346cf 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -76,6 +76,8 @@ checkentry_v0(const char *tablename, unsigned int targinfosize, unsigned int hook_mask) { + struct ipt_mark_target_info *markinfo = targinfo; + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", targinfosize, @@ -88,6 +90,11 @@ checkentry_v0(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } @@ -120,6 +127,11 @@ checkentry_v1(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c index 2706f96cea55..bf8de47ce004 100644 --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c @@ -54,9 +54,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_connmark_info *cm = + (struct ipt_connmark_info *)matchinfo; if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) return 0; + if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { + printk(KERN_WARNING "connmark: only support 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c index 8955728127b9..00bef6cdd3f8 100644 --- a/net/ipv4/netfilter/ipt_mark.c +++ b/net/ipv4/netfilter/ipt_mark.c @@ -37,9 +37,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo; + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) return 0; + if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { + printk(KERN_WARNING "mark: only supports 32bit mark\n"); + return 0; + } + return 1; } From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:24:19 -0700 Subject: [PATCH 268/584] [NETFILTER]: reduce netfilter sk_buff enlargement As discussed at netconf'05, we're trying to save every bit in sk_buff. The patch below makes sk_buff 8 bytes smaller. I did some basic testing on my notebook and it seems to work. The only real in-tree user of nfcache was IPVS, who only needs a single bit. Unfortunately I couldn't find some other free bit in sk_buff to stuff that bit into, so I introduced a separate field for them. Maybe the IPVS guys can resolve that to further save space. Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and alike are only 6 values defined), but unfortunately the bluetooth code overloads pkt_type :( The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just came up with a way how to do it without any skb fields, so it's safe to remove it. - remove all never-implemented 'nfcache' code - don't have ipvs code abuse 'nfcache' field. currently get's their own compile-conditional skb->ipvs_property field. IPVS maintainers can decide to move this bit elswhere, but nfcache needs to die. - remove skb->nfcache field to save 4 bytes - move skb->nfctinfo into three unused bits to save further 4 bytes Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 +++ include/linux/netfilter_decnet.h | 3 +++ include/linux/netfilter_ipv4.h | 3 +++ include/linux/netfilter_ipv6.h | 3 +++ include/linux/skbuff.h | 10 +++++----- net/bridge/netfilter/ebt_mark.c | 5 ++--- net/core/skbuff.c | 2 -- net/ipv4/ip_output.c | 1 - net/ipv4/ipvs/ip_vs_core.c | 9 +++++---- net/ipv4/ipvs/ip_vs_xmit.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 7 +------ net/ipv4/netfilter/ip_nat_core.c | 1 - net/ipv4/netfilter/ip_nat_standalone.c | 2 -- net/ipv4/netfilter/ip_queue.c | 1 - net/ipv4/netfilter/ip_tables.c | 1 - net/ipv4/netfilter/ipt_CLASSIFY.c | 4 +--- net/ipv4/netfilter/ipt_CONNMARK.c | 4 +--- net/ipv4/netfilter/ipt_DSCP.c | 1 - net/ipv4/netfilter/ipt_ECN.c | 2 -- net/ipv4/netfilter/ipt_MARK.c | 10 ++++------ net/ipv4/netfilter/ipt_REJECT.c | 1 - net/ipv4/netfilter/ipt_TCPMSS.c | 1 - net/ipv4/netfilter/ipt_TOS.c | 1 - net/ipv6/ip6_output.c | 16 ++-------------- net/ipv6/netfilter/ip6_queue.c | 1 - net/ipv6/netfilter/ip6_tables.c | 1 - net/ipv6/netfilter/ip6t_MARK.c | 5 ++--- 27 files changed, 36 insertions(+), 64 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2e2045482cb1..ec60856408fd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,10 +21,13 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* Generic cache responses from hook functions. <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 +#endif #ifdef __KERNEL__ #include diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 3064eec9cb8e..018979484150 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -9,6 +9,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_DN_SRC 0x0001 @@ -18,6 +20,7 @@ #define NFC_DN_IF_IN 0x0004 /* Output device. */ #define NFC_DN_IF_OUT 0x0008 +#endif /* ! __KERNEL__ */ /* DECnet Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 3ebc36afae1a..552815b8193e 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -8,6 +8,8 @@ #include #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP_SRC 0x0001 @@ -35,6 +37,7 @@ #define NFC_IP_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index bee7a5ec7c66..20c069a5e4ac 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -10,6 +10,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP6_SRC 0x0001 @@ -38,6 +40,7 @@ #define NFC_IP6_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP6_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP6 Hooks */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e40f4c9f7a6..4b929c3c1a98 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -190,7 +190,6 @@ struct skb_shared_info { * @end: End pointer * @destructor: Destruct function * @nfmark: Can be used for communication between hooks - * @nfcache: Cache info * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -252,17 +251,18 @@ struct sk_buff { __u8 local_df:1, cloned:1, ip_summed:2, - nohdr:1; - /* 3 bits spare */ + nohdr:1, + nfctinfo:3; __u8 pkt_type; __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER __u32 nfmark; - __u32 nfcache; - __u32 nfctinfo; struct nf_conntrack *nfct; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + __u8 ipvs_property:1; +#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 02c632b4d325..c93d35ab95c0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; - if ((*pskb)->nfmark != info->mark) { + if ((*pskb)->nfmark != info->mark) (*pskb)->nfmark = info->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return info->target; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..096991cb09d9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -361,7 +361,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -424,7 +423,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 80d13103b2b0..766564cb4207 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -392,7 +392,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) #endif #ifdef CONFIG_NETFILTER to->nfmark = from->nfmark; - to->nfcache = from->nfcache; /* Connection association is same as pre-frag packet */ nf_conntrack_put(to->nfct); to->nfct = from->nfct; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 5fb257dd07cb..3ac7eeca04ac 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -22,6 +22,7 @@ * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs + * Harald Welte don't use nfcache * */ @@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY)) + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ @@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; verdict = NF_ACCEPT; out: @@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, EnterFunction(11); - if (skb->nfcache & NFC_IPVS_PROPERTY) + if (skb->ipvs_property) return NF_ACCEPT; iph = skb->nh.iph; @@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index a8512a3fd08a..3b87482049cf 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ - (skb)->nfcache |= NFC_IPVS_PROPERTY; \ + (skb)->ipvs_property = 1; \ (skb)->ip_summed = CHECKSUM_NONE; \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a7f0c821a9b2..04c3414361d4 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -625,9 +625,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return NF_DROP; } - /* FIXME: Do this right please. --RR */ - (*pskb)->nfcache |= NFC_UNKNOWN; - /* Doesn't cover locally-generated broadcast, so not worth it. */ #if 0 /* Ignore broadcast: no `connection'. */ @@ -943,10 +940,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) skb = ip_defrag(skb, user); local_bh_enable(); - if (skb) { + if (skb) ip_send_check(skb->nh.iph); - skb->nfcache |= NFC_ALTERED; - } return skb; } diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 739b6dde1c82..ed4d731880f7 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -321,7 +321,6 @@ manip_pkt(u_int16_t proto, { struct iphdr *iph; - (*pskb)->nfcache |= NFC_ALTERED; if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 91d5ea1dbbc9..9ecba979033a 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - (*pskb)->nfcache |= NFC_UNKNOWN; - /* If we had a hardware checksum before, it's now invalid */ if ((*pskb)->ip_summed == CHECKSUM_HW) if (skb_checksum_help(*pskb, (out == NULL))) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c6baa8174389..bc0af8d8e910 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -392,7 +392,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c88dfcd38c56..ff8d85d2070d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { struct ipt_entry_target *t; diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c index 9842e6e23184..dab78d8bd494 100644 --- a/net/ipv4/netfilter/ipt_CLASSIFY.c +++ b/net/ipv4/netfilter/ipt_CLASSIFY.c @@ -32,10 +32,8 @@ target(struct sk_buff **pskb, { const struct ipt_classify_target_info *clinfo = targinfo; - if((*pskb)->priority != clinfo->priority) { + if((*pskb)->priority != clinfo->priority) (*pskb)->priority = clinfo->priority; - (*pskb)->nfcache |= NFC_ALTERED; - } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 8ed744157b1a..134638021339 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -61,10 +61,8 @@ target(struct sk_buff **pskb, case IPT_CONNMARK_RESTORE: nfmark = (*pskb)->nfmark; diff = (ct->mark ^ nfmark) & markinfo->mask; - if (diff != 0) { + if (diff != 0) (*pskb)->nfmark = nfmark ^ diff; - (*pskb)->nfcache |= NFC_ALTERED; - } break; } } diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 3ea4509099f9..975476fef27a 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -51,7 +51,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^ 0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 94a0ce1c1c9d..f63a9bc0e4d2 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return 1; } @@ -87,7 +86,6 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) tcph->check = csum_fold(csum_partial((char *)diffs, sizeof(diffs), tcph->check^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; return 1; } diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 8526398346cf..52b4f2c296bf 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb, { const struct ipt_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } @@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb, break; } - if((*pskb)->nfmark != mark) { + if((*pskb)->nfmark != mark) (*pskb)->nfmark = mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 915696446020..f115a84a4ac6 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); - nskb->nfcache = 0; nskb->nfmark = 0; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(nskb->nf_bridge); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 7b84a254440e..949288319ca8 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -190,7 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, newmss); retmodified: - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 85c70d240f8b..49abb7eef0a4 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -46,7 +46,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ae652ca14bc9..590d2b797197 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -185,19 +185,6 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif -static inline int ip6_maybe_reroute(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER - if (skb->nfcache & NFC_ALTERED){ - if (ip6_route_me_harder(skb) != 0){ - kfree_skb(skb); - return -EINVAL; - } - } -#endif /* CONFIG_NETFILTER */ - return dst_output(skb); -} - /* * xmit an sk_buff (used by TCP) */ @@ -266,7 +253,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); + return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); } if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index a16df5b27c84..83ccedceed17 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -388,7 +388,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73034511c8db..41a67cf6e33a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, &protoff, &offset)) { struct ip6t_entry_target *t; diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index d09ceb05013a..81924fcc5857 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -28,10 +28,9 @@ target(struct sk_buff **pskb, { const struct ip6t_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IP6T_CONTINUE; } From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:25:21 -0700 Subject: [PATCH 269/584] [NET]: Kill skb->list Remove the "list" member of struct sk_buff, as it is entirely redundant. All SKB list removal callers know which list the SKB is on, so storing this in sk_buff does nothing other than taking up some space. Two tricky bits were SCTP, which I took care of, and two ATM drivers which Francois Romieu fixed up. Signed-off-by: David S. Miller Signed-off-by: Francois Romieu --- drivers/atm/nicstar.c | 157 +++++++++++++++---------------- drivers/atm/nicstar.h | 16 +++- drivers/atm/zatm.c | 8 +- drivers/bluetooth/bfusb.c | 8 +- drivers/ieee1394/ieee1394_core.c | 4 +- drivers/isdn/act2000/capi.c | 2 +- drivers/net/shaper.c | 50 +--------- drivers/net/wan/sdla_fr.c | 22 ++--- drivers/usb/net/usbnet.c | 21 ++--- include/linux/skbuff.h | 16 +--- net/atm/ipcommon.c | 3 - net/ax25/ax25_subr.c | 2 +- net/core/skbuff.c | 57 +++++------ net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_out.c | 2 +- net/econet/af_econet.c | 4 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 29 +++--- net/ipv4/tcp_output.c | 6 +- net/irda/irlap_frame.c | 6 -- net/lapb/lapb_subr.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 6 +- net/netrom/nr_subr.c | 2 +- net/rose/rose_subr.c | 2 +- net/sctp/socket.c | 4 +- net/sctp/ulpqueue.c | 63 ++++++++----- net/unix/garbage.c | 12 +-- net/x25/x25_subr.c | 2 +- 29 files changed, 229 insertions(+), 283 deletions(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index b2a7b754fd14..a0e3bd861f1c 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -214,8 +214,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev); static void __devinit ns_init_card_error(ns_dev *card, int error); static scq_info *get_scq(int size, u32 scd); static void free_scq(scq_info *scq, struct atm_vcc *vcc); -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2); +static void push_rxbufs(ns_dev *, struct sk_buff *); static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs); static int ns_open(struct atm_vcc *vcc); static void ns_close(struct atm_vcc *vcc); @@ -766,6 +765,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -786,9 +786,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); /* Due to the implementation of push_rxbufs() this is 1, not 0 */ if (j == 1) { @@ -822,9 +823,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } /* Test for strange behaviour which leads to crashes */ if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min) @@ -852,6 +854,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; } @@ -1078,12 +1081,18 @@ static void free_scq(scq_info *scq, struct atm_vcc *vcc) /* The handles passed must be pointers to the sk_buff containing the small or large buffer(s) cast to u32. */ -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2) +static void push_rxbufs(ns_dev *card, struct sk_buff *skb) { + struct ns_skb_cb *cb = NS_SKB_CB(skb); + u32 handle1, addr1; + u32 handle2, addr2; u32 stat; unsigned long flags; + /* *BARF* */ + handle2 = addr2 = 0; + handle1 = (u32)skb; + addr1 = (u32)virt_to_bus(skb->data); #ifdef GENERAL_DEBUG if (!addr1) @@ -1093,7 +1102,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, stat = readl(card->membase + STAT); card->sbfqc = ns_stat_sfbqc_get(stat); card->lbfqc = ns_stat_lfbqc_get(stat); - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (!addr2) { @@ -1111,7 +1120,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, } } } - else /* type == BUF_LG */ + else /* buf_type == BUF_LG */ { if (!addr2) { @@ -1132,26 +1141,26 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, if (addr2) { - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (card->sbfqc >= card->sbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } else card->sbfqc += 2; } - else /* (type == BUF_LG) */ + else /* (buf_type == BUF_LG) */ { if (card->lbfqc >= card->lbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } @@ -1166,12 +1175,12 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, writel(handle2, card->membase + DR2); writel(addr1, card->membase + DR1); writel(handle1, card->membase + DR0); - writel(NS_CMD_WRITE_FREEBUFQ | (u32) type, card->membase + CMD); + writel(NS_CMD_WRITE_FREEBUFQ | cb->buf_type, card->membase + CMD); spin_unlock_irqrestore(&card->res_lock, flags); XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index, - (type == BUF_SM ? "small" : "large"), addr1, addr2); + (cb->buf_type == BUF_SM ? "small" : "large"), addr1, addr2); } if (!card->efbie && card->sbfqc >= card->sbnr.min && @@ -1322,9 +1331,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) card->efbie = 0; break; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } card->sbfqc = i; process_rsq(card); @@ -1348,9 +1358,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) card->efbie = 0; break; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } card->lbfqc = i; process_rsq(card); @@ -2227,6 +2238,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) recycle_rx_buf(card, skb); return; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; } else if (--card->iovpool.count < card->iovnr.min) @@ -2234,6 +2246,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) struct sk_buff *new_iovb; if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL) { + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, new_iovb); card->iovpool.count++; } @@ -2264,7 +2277,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (NS_SKB(iovb)->iovcnt == 1) { - if (skb->list != &card->sbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_SM) { printk("nicstar%d: Expected a small buffer, and this is not one.\n", card->index); @@ -2278,7 +2291,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) } else /* NS_SKB(iovb)->iovcnt >= 2 */ { - if (skb->list != &card->lbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_LG) { printk("nicstar%d: Expected a large buffer, and this is not one.\n", card->index); @@ -2322,8 +2335,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) /* skb points to a small buffer */ if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2350,8 +2362,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { if (!atm_charge(vcc, sb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2367,16 +2378,14 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); } else /* len > NS_SMBUFSIZE, the usual case */ { if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2394,8 +2403,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); } @@ -2430,6 +2438,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) card->hbpool.count++; } } + NS_SKB_CB(hb)->buf_type = BUF_NONE; } else if (--card->hbpool.count < card->hbnr.min) @@ -2437,6 +2446,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) struct sk_buff *new_hb; if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2444,6 +2454,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2473,8 +2484,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) remaining = len - iov->iov_len; iov++; /* Free the small buffer */ - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); /* Copy all large buffers to the huge buffer and free them */ for (j = 1; j < NS_SKB(iovb)->iovcnt; j++) @@ -2485,8 +2495,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) skb_put(hb, tocopy); iov++; remaining -= tocopy; - push_rxbufs(card, BUF_LG, (u32) lb, - (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } #ifdef EXTRA_DEBUG if (remaining != 0 || hb->len != len) @@ -2527,9 +2536,10 @@ static void ns_sb_destructor(struct sk_buff *sb) sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) break; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } while (card->sbfqc < card->sbnr.min); } @@ -2550,9 +2560,10 @@ static void ns_lb_destructor(struct sk_buff *lb) lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) break; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } while (card->lbfqc < card->lbnr.min); } @@ -2569,6 +2580,7 @@ static void ns_hb_destructor(struct sk_buff *hb) hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) break; + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -2577,45 +2589,25 @@ static void ns_hb_destructor(struct sk_buff *hb) #endif /* NS_USE_DESTRUCTORS */ - static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb) { - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } -} + struct ns_skb_cb *cb = NS_SKB_CB(skb); + if (unlikely(cb->buf_type == BUF_NONE)) { + printk("nicstar%d: What kind of rx buffer is this?\n", card->index); + dev_kfree_skb_any(skb); + } else + push_rxbufs(card, skb); +} static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count) { - struct sk_buff *skb; - - for (; count > 0; count--) - { - skb = (struct sk_buff *) (iov++)->iov_base; - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } - } + while (count-- > 0) + recycle_rx_buf(card, (struct sk_buff *) (iov++)->iov_base); } - static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) { if (card->iovpool.count < card->iovnr.max) @@ -2631,7 +2623,7 @@ static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) { - skb_unlink(sb); + skb_unlink(sb, &card->sbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->sbfqc < card->sbnr.min) #else @@ -2640,10 +2632,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } if (card->sbfqc < card->sbnr.init) @@ -2652,10 +2644,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } } @@ -2664,7 +2656,7 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) { - skb_unlink(lb); + skb_unlink(lb, &card->lbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->lbfqc < card->lbnr.min) #else @@ -2673,10 +2665,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } if (card->lbfqc < card->lbnr.init) @@ -2685,10 +2677,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } } @@ -2880,9 +2872,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) return -ENOMEM; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } break; @@ -2894,9 +2887,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) return -ENOMEM; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } break; @@ -2923,6 +2917,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) return -ENOMEM; + NS_SKB_CB(hb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; @@ -2953,6 +2948,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL); if (iovb == NULL) return -ENOMEM; + NS_SKB_CB(iovb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; @@ -2979,17 +2975,12 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) } - static void which_list(ns_dev *card, struct sk_buff *skb) { - printk("It's a %s buffer.\n", skb->list == &card->sbpool.queue ? - "small" : skb->list == &card->lbpool.queue ? "large" : - skb->list == &card->hbpool.queue ? "huge" : - skb->list == &card->iovpool.queue ? "iovec" : "unknown"); + printk("skb buf_type: 0x%08x\n", NS_SKB_CB(skb)->buf_type); } - static void ns_poll(unsigned long arg) { int i; diff --git a/drivers/atm/nicstar.h b/drivers/atm/nicstar.h index ea83c46c8ba5..5997bcb45b59 100644 --- a/drivers/atm/nicstar.h +++ b/drivers/atm/nicstar.h @@ -103,8 +103,14 @@ #define NS_IOREMAP_SIZE 4096 -#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ -#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +/* + * BUF_XX distinguish the Rx buffers depending on their (small/large) size. + * BUG_SM and BUG_LG are both used by the driver and the device. + * BUF_NONE is only used by the driver. + */ +#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ +#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +#define BUF_NONE 0xffffffff /* Software only: */ #define NS_HBUFSIZE 65568 /* Size of max. AAL5 PDU */ #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \ @@ -684,6 +690,12 @@ enum ns_regs /* Device driver structures ***************************************************/ +struct ns_skb_cb { + u32 buf_type; /* BUF_SM/BUF_LG/BUF_NONE */ +}; + +#define NS_SKB_CB(skb) ((struct ns_skb_cb *)((skb)->cb)) + typedef struct tsq_info { void *org; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index a2b236a966e0..85fee9530fa9 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -417,10 +417,12 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); chan = (here[3] & uPD98401_AAL5_CHAN) >> uPD98401_AAL5_CHAN_SHIFT; if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { + int pos = ZATM_VCC(vcc)->pool; + vcc = zatm_dev->rx_map[chan]; - if (skb == zatm_dev->last_free[ZATM_VCC(vcc)->pool]) - zatm_dev->last_free[ZATM_VCC(vcc)->pool] = NULL; - skb_unlink(skb); + if (skb == zatm_dev->last_free[pos]) + zatm_dev->last_free[pos] = NULL; + skb_unlink(skb, zatm_dev->pool + pos); } else { printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index c42d7e6ac1c5..e8d2a340356d 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -158,7 +158,7 @@ static int bfusb_send_bulk(struct bfusb *bfusb, struct sk_buff *skb) if (err) { BT_ERR("%s bulk tx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); usb_free_urb(urb); } else atomic_inc(&bfusb->pending_tx); @@ -212,7 +212,7 @@ static void bfusb_tx_complete(struct urb *urb, struct pt_regs *regs) read_lock(&bfusb->lock); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); skb_queue_tail(&bfusb->completed_q, skb); bfusb_tx_wakeup(bfusb); @@ -253,7 +253,7 @@ static int bfusb_rx_submit(struct bfusb *bfusb, struct urb *urb) if (err) { BT_ERR("%s bulk rx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); usb_free_urb(urb); } @@ -398,7 +398,7 @@ static void bfusb_rx_complete(struct urb *urb, struct pt_regs *regs) buf += len; } - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); bfusb_rx_submit(bfusb, urb); diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index b248d89de8b4..d633770fac8e 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -681,7 +681,7 @@ static void handle_packet_response(struct hpsb_host *host, int tcode, return; } - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); if (packet->state == hpsb_queued) { packet->sendtime = jiffies; @@ -989,7 +989,7 @@ void abort_timedouts(unsigned long __opaque) packet = (struct hpsb_packet *)skb->data; if (time_before(packet->sendtime + expire, jiffies)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); packet->state = hpsb_complete; packet->ack_code = ACKX_TIMEOUT; queue_packet_complete(packet); diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c index afa46681f983..6ae6eb322111 100644 --- a/drivers/isdn/act2000/capi.c +++ b/drivers/isdn/act2000/capi.c @@ -606,7 +606,7 @@ handle_ack(act2000_card *card, act2000_chan *chan, __u8 blocknr) { if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) && (m->msg.data_b3_req.blocknr == blocknr)) { /* found corresponding DATA_B3_REQ */ - skb_unlink(tmp); + skb_unlink(tmp, &card->ackq); chan->queued -= m->msg.data_b3_req.datalen; if (m->msg.data_b3_req.flags) ret = m->msg.data_b3_req.datalen; diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index 3ad0b6751f6f..221354eea21f 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -156,52 +156,6 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb); -#ifdef SHAPER_COMPLEX /* and broken.. */ - - while(ptr && ptr!=(struct sk_buff *)&shaper->sendq) - { - if(ptr->pripri - && jiffies - SHAPERCB(ptr)->shapeclock < SHAPER_MAXSLIP) - { - struct sk_buff *tmp=ptr->prev; - - /* - * It goes before us therefore we slip the length - * of the new frame. - */ - - SHAPERCB(ptr)->shapeclock+=SHAPERCB(skb)->shapelen; - SHAPERCB(ptr)->shapelatency+=SHAPERCB(skb)->shapelen; - - /* - * The packet may have slipped so far back it - * fell off. - */ - if(SHAPERCB(ptr)->shapelatency > SHAPER_LATENCY) - { - skb_unlink(ptr); - dev_kfree_skb(ptr); - } - ptr=tmp; - } - else - break; - } - if(ptr==NULL || ptr==(struct sk_buff *)&shaper->sendq) - skb_queue_head(&shaper->sendq,skb); - else - { - struct sk_buff *tmp; - /* - * Set the packet clock out time according to the - * frames ahead. Im sure a bit of thought could drop - * this loop. - */ - for(tmp=skb_peek(&shaper->sendq); tmp!=NULL && tmp!=ptr; tmp=tmp->next) - SHAPERCB(skb)->shapeclock+=tmp->shapelen; - skb_append(ptr,skb); - } -#else { struct sk_buff *tmp; /* @@ -220,7 +174,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) } else skb_queue_tail(&shaper->sendq, skb); } -#endif + if(sh_debug) printk("Frame queued.\n"); if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN) @@ -302,7 +256,7 @@ static void shaper_kick(struct shaper *shaper) * Pull the frame and get interrupts back on. */ - skb_unlink(skb); + skb_unlink(skb, &shaper->sendq); if (shaper->recovery < SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen) shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen; diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c index c5f5e62aab8b..0497dbdb8631 100644 --- a/drivers/net/wan/sdla_fr.c +++ b/drivers/net/wan/sdla_fr.c @@ -445,7 +445,7 @@ void s508_s514_unlock(sdla_t *card, unsigned long *smp_flags); void s508_s514_lock(sdla_t *card, unsigned long *smp_flags); unsigned short calc_checksum (char *, int); -static int setup_fr_header(struct sk_buff** skb, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode); @@ -1372,7 +1372,7 @@ static int if_send(struct sk_buff* skb, struct net_device* dev) /* Move the if_header() code to here. By inserting frame * relay header in if_header() we would break the * tcpdump and other packet sniffers */ - chan->fr_header_len = setup_fr_header(&skb,dev,chan->common.usedby); + chan->fr_header_len = setup_fr_header(skb,dev,chan->common.usedby); if (chan->fr_header_len < 0 ){ ++chan->ifstats.tx_dropped; ++card->wandev.stats.tx_dropped; @@ -1597,8 +1597,6 @@ static int setup_for_delayed_transmit(struct net_device* dev, return 1; } - skb_unlink(skb); - chan->transmit_length = len; chan->delay_skb = skb; @@ -4871,18 +4869,15 @@ static void unconfig_fr (sdla_t *card) } } -static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode) { - struct sk_buff *skb = *skb_orig; fr_channel_t *chan=dev->priv; - if (op_mode == WANPIPE){ - + if (op_mode == WANPIPE) { chan->fr_header[0]=Q922_UI; switch (htons(skb->protocol)){ - case ETH_P_IP: chan->fr_header[1]=NLPID_IP; break; @@ -4894,16 +4889,14 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, } /* If we are in bridging mode, we must apply - * an Ethernet header */ - if (op_mode == BRIDGE || op_mode == BRIDGE_NODE){ - - + * an Ethernet header + */ + if (op_mode == BRIDGE || op_mode == BRIDGE_NODE) { /* Encapsulate the packet as a bridged Ethernet frame. */ #ifdef DEBUG printk(KERN_INFO "%s: encapsulating skb for frame relay\n", dev->name); #endif - chan->fr_header[0] = 0x03; chan->fr_header[1] = 0x00; chan->fr_header[2] = 0x80; @@ -4916,7 +4909,6 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, /* Yuck. */ skb->protocol = ETH_P_802_3; return 8; - } return 0; diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 4528a00c45b0..a2f67245f6da 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -2903,19 +2903,18 @@ static struct net_device_stats *usbnet_get_stats (struct net_device *net) * completion callbacks. 2.5 should have fixed those bugs... */ -static void defer_bh (struct usbnet *dev, struct sk_buff *skb) +static void defer_bh(struct usbnet *dev, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; unsigned long flags; - spin_lock_irqsave (&list->lock, flags); - __skb_unlink (skb, list); - spin_unlock (&list->lock); - spin_lock (&dev->done.lock); - __skb_queue_tail (&dev->done, skb); + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock(&list->lock); + spin_lock(&dev->done.lock); + __skb_queue_tail(&dev->done, skb); if (dev->done.qlen == 1) - tasklet_schedule (&dev->bh); - spin_unlock_irqrestore (&dev->done.lock, flags); + tasklet_schedule(&dev->bh); + spin_unlock_irqrestore(&dev->done.lock, flags); } /* some work can't be done in tasklets, so we use keventd @@ -3120,7 +3119,7 @@ block: break; } - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->rxq); if (urb) { if (netif_running (dev->net) @@ -3490,7 +3489,7 @@ static void tx_complete (struct urb *urb, struct pt_regs *regs) urb->dev = NULL; entry->state = tx_done; - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->txq); } /*-------------------------------------------------------------------------*/ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b929c3c1a98..76c68851474c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -204,7 +204,6 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - struct sk_buff_head *list; struct sock *sk; struct timeval stamp; struct net_device *dev; @@ -597,7 +596,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; prev = (struct sk_buff *)list; next = prev->next; @@ -622,7 +620,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; next = (struct sk_buff *)list; prev = next->prev; @@ -655,7 +652,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) next->prev = prev; prev->next = next; result->next = result->prev = NULL; - result->list = NULL; } return result; } @@ -664,7 +660,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) /* * Insert a packet on a list. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -672,24 +668,23 @@ static inline void __skb_insert(struct sk_buff *newsk, newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; - newsk->list = list; list->qlen++; } /* * Place a packet after a given packet in a list. */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { - __skb_insert(newsk, old, old->next, old->list); + __skb_insert(newsk, old, old->next, list); } /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb); +extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -698,7 +693,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - skb->list = NULL; next->prev = prev; prev->next = next; } diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index 181a3002d8ad..4b1faca5013f 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -34,7 +34,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) { - struct sk_buff *skb; unsigned long flags; struct sk_buff *skb_from = (struct sk_buff *) from; struct sk_buff *skb_to = (struct sk_buff *) to; @@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) prev->next = skb_to; to->prev->next = from->next; to->prev = from->prev; - for (skb = from->next; skb != skb_to; skb = skb->next) - skb->list = to; to->qlen += from->qlen; spin_unlock(&to->lock); from->prev = skb_from; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 99694b57f6f5..eb7343c10a9f 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25) if (skb_prev == NULL) skb_queue_head(&ax25->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &ax25->write_queue); skb_prev = skb; } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 096991cb09d9..e6564b0a6839 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -281,8 +281,6 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - BUG_ON(skb->list != NULL); - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); @@ -333,7 +331,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; C(stamp); C(dev); @@ -403,7 +400,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; new->real_dev = old->real_dev; @@ -1342,50 +1338,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; + unsigned long flags; - if (list) { - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1393,19 +1382,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index acdd18e6adb2..0c30409fe9e5 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, nskb = skb->next; if (skb->len == 0) { - skb_unlink(skb); + skb_unlink(skb, queue); kfree_skb(skb); /* * N.B. Don't refer to skb or cb after this point diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8cce1fdbda90..e0bebf4bbcad 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff xmit_count = cb2->xmit_count; segnum = cb2->segnum; /* Remove and drop ack'ed packet */ - skb_unlink(ack); + skb_unlink(ack, q); kfree_skb(ack); ack = NULL; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index de691e119e17..b807a314269e 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result) foundit: tx_result(skb->sk, eb->cookie, result); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); spin_unlock_irqrestore(&aun_queue_lock, flags); kfree_skb(skb); } @@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h) { tx_result(skb->sk, eb->cookie, ECTYPE_TRANSMIT_NOT_PRESENT); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); kfree_skb(skb); } skb = newskb; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 69b1fcf70077..d2696af46c70 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -975,7 +975,7 @@ do_fault: if (!skb->len) { if (sk->sk_send_head == skb) sk->sk_send_head = NULL; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53a8a5399f1e..ffa24025cd02 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2085,7 +2085,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt seq_rtt = now - scb->when; tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } @@ -2853,7 +2853,7 @@ static void tcp_ofo_queue(struct sock *sk) if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received \n"); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __kfree_skb(skb); continue; } @@ -2861,7 +2861,7 @@ static void tcp_ofo_queue(struct sock *sk) tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) @@ -3027,7 +3027,7 @@ drop: u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_append(skb1, skb); + __skb_append(skb1, skb, &tp->out_of_order_queue); if (!tp->rx_opt.num_sacks || tp->selective_acks[0].end_seq != seq) @@ -3071,7 +3071,7 @@ drop: tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); break; } - __skb_unlink(skb1, skb1->list); + __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); __kfree_skb(skb1); } @@ -3088,8 +3088,9 @@ add_sack: * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff *head, - struct sk_buff *tail, u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, + struct sk_buff *head, struct sk_buff *tail, + u32 start, u32 end) { struct sk_buff *skb; @@ -3099,7 +3100,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3145,7 +3146,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, skb->list); + __skb_insert(nskb, skb->prev, skb, list); sk_stream_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -3164,7 +3165,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3200,7 +3201,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk) if (skb == (struct sk_buff *)&tp->out_of_order_queue || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, head, skb, start, end); + tcp_collapse(sk, &tp->out_of_order_queue, + head, skb, start, end); head = skb; if (skb == (struct sk_buff *)&tp->out_of_order_queue) break; @@ -3237,7 +3239,8 @@ static int tcp_prune_queue(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, sk->sk_receive_queue.next, + tcp_collapse(sk, &sk->sk_receive_queue, + sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); sk_stream_mem_reclaim(sk); @@ -3462,7 +3465,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); tp->copied_seq++; if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dd30dd137b74..a4d1eb9a0926 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -505,7 +505,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -893,7 +893,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -1238,7 +1238,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m tcp_skb_pcount(next_skb) != 1); /* Ok. We will be able to collapse the packet. */ - __skb_unlink(next_skb, next_skb->list); + __skb_unlink(next_skb, &sk->sk_write_queue); memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 6dafbb43b529..eb65b4925b51 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command) IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; @@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command) IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c index 5de05a0bc0ff..8b5eefd70f03 100644 --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c @@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb) if (!skb_prev) skb_queue_head(&lapb->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &lapb->write_queue); skb_prev = skb; } } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20b4cfebd74c..f49b82da8264 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (uaddr) memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); - if (!skb->list) { + if (!skb->next) { dgram_free: kfree_skb(skb); } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index eba812a9c69c..571548619469 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) if (!ev->ind_prim && !ev->cfm_prim) { /* indicate or confirm not required */ - if (!skb->list) + /* XXX this is not very pretty, perhaps we should store + * XXX indicate/confirm-needed state in the llc_conn_state_ev + * XXX control block of the SKB instead? -DaveM + */ + if (!skb->next) goto out_kfree_skb; goto out_skb_put; } diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 0627347b14b8..252c1b3ecd78 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk) if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7db7e1cedc3a..ae135e27799b 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk) if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 091a66f06a35..4454afe4727e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); } } @@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); } } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 8bbc279d6c99..ec2c857eae7f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -50,9 +50,9 @@ /* Forward declarations for internal helpers. */ static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *); + struct sctp_ulpevent *); static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, - struct sctp_ulpevent *); + struct sctp_ulpevent *); /* 1st Level Abstractions */ @@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_order(ulpq, event); } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); @@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) return sctp_clear_pd(ulpq->asoc->base.sk); } - - +/* If the SKB of 'event' is on a list, it is the first such member + * of that list. + */ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; - struct sk_buff_head *queue; + struct sk_buff_head *queue, *skb_list; + struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; + skb_list = (struct sk_buff_head *) skb->prev; + /* If the socket is just going to throw this away, do not * even try to deliver it. */ @@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) /* If we are harvesting multiple skbs they will be * collected on a list. */ - if (sctp_event2skb(event)->list) - sctp_skb_list_tail(sctp_event2skb(event)->list, queue); + if (skb_list) + sctp_skb_list_tail(skb_list, queue); else - __skb_queue_tail(queue, sctp_event2skb(event)); + __skb_queue_tail(queue, skb); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive @@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) return 1; out_free: - if (sctp_event2skb(event)->list) - sctp_queue_purge_ulpevents(sctp_event2skb(event)->list); + if (skb_list) + sctp_queue_purge_ulpevents(skb_list); else sctp_ulpevent_free(event); + return 0; } @@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sctp_ulpevent *event; @@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, skb_shinfo(f_frag)->frag_list = pos; /* Remove the first fragment from the reassembly queue. */ - __skb_unlink(f_frag, f_frag->list); + __skb_unlink(f_frag, queue); while (pos) { pnext = pos->next; @@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ - __skb_unlink(pos, pos->list); + __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) @@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u done: return retval; found: - retval = sctp_make_reassembled_event(first_frag, pos); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); return retval; } @@ -537,6 +544,7 @@ done: static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { + struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *in; @@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; + event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; + /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; @@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, sid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(event_list, pos); } } @@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *event) + struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *in; @@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; - struct sctp_ulpevent *event = NULL; + struct sctp_ulpevent *event; struct sctp_stream *in; struct sk_buff_head temp; __u16 csid, cssn; @@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) in = &ulpq->asoc->ssnmap->in; /* We are holding the chunks by stream, by SSN. */ + skb_queue_head_init(&temp); + event = NULL; sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; @@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, csid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); if (!event) { /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); - skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); } else { /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(&temp, pos); } } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 4bd95c8f5934..46252d2807bb 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -286,16 +286,16 @@ void unix_gc(void) skb = skb_peek(&s->sk_receive_queue); while (skb && skb != (struct sk_buff *)&s->sk_receive_queue) { - nextsk=skb->next; + nextsk = skb->next; /* * Do we have file descriptors ? */ - if(UNIXCB(skb).fp) - { - __skb_unlink(skb, skb->list); - __skb_queue_tail(&hitlist,skb); + if (UNIXCB(skb).fp) { + __skb_unlink(skb, + &s->sk_receive_queue); + __skb_queue_tail(&hitlist, skb); } - skb=nextsk; + skb = nextsk; } spin_unlock(&s->sk_receive_queue.lock); } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 7fd872ad0c20..e20cfadad4d9 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk) if (!skb_prev) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } From abc3bc58047efa72ee9c2e208cbeb73d261ad703 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:25:56 -0700 Subject: [PATCH 270/584] [NET]: Kill skb->tc_classid Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- include/net/act_api.h | 2 +- net/core/skbuff.c | 2 -- net/sched/act_api.c | 7 +------ net/sched/gact.c | 2 +- net/sched/ipt.c | 2 +- net/sched/mirred.c | 2 +- net/sched/pedit.c | 2 +- net/sched/police.c | 3 ++- net/sched/simple.c | 2 +- 10 files changed, 9 insertions(+), 18 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 76c68851474c..f10a8b9628b0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ struct skb_shared_info { * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @tc_classid: traffic control classid */ struct sk_buff { @@ -275,9 +274,7 @@ struct sk_buff { __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u32 tc_verd; /* traffic control verdict */ - __u32 tc_classid; /* traffic control classid */ #endif - #endif diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a995f576..b55eb7c7f033 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -63,7 +63,7 @@ struct tc_action_ops __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; - int (*act)(struct sk_buff **, struct tc_action *); + int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e6564b0a6839..8896e6f8aa42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -300,7 +300,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -376,7 +375,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 249c61936ea0..c896a0118a32 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, while ((a = act) != NULL) { repeat: if (a->ops && a->ops->act) { - ret = a->ops->act(&skb, a); + ret = a->ops->act(&skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -179,11 +179,6 @@ repeat: act = a->next; } exec_done: - if (skb->tc_classid > 0) { - res->classid = skb->tc_classid; - res->class = 0; - skb->tc_classid = 0; - } return ret; } diff --git a/net/sched/gact.c b/net/sched/gact.c index a811c89fef7f..d1c6d542912a 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind) } static int -tcf_gact(struct sk_buff **pskb, struct tc_action *a) +tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_gact *p = PRIV(a, gact); struct sk_buff *skb = *pskb; diff --git a/net/sched/ipt.c b/net/sched/ipt.c index b114d994d523..f50136eed211 100644 --- a/net/sched/ipt.c +++ b/net/sched/ipt.c @@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind) } static int -tcf_ipt(struct sk_buff **pskb, struct tc_action *a) +tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *p = PRIV(a, ipt); diff --git a/net/sched/mirred.c b/net/sched/mirred.c index f309ce336803..20d06916dc0b 100644 --- a/net/sched/mirred.c +++ b/net/sched/mirred.c @@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind) } static int -tcf_mirred(struct sk_buff **pskb, struct tc_action *a) +tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *p = PRIV(a, mirred); struct net_device *dev; diff --git a/net/sched/pedit.c b/net/sched/pedit.c index 678be6a645fb..767d24f4610e 100644 --- a/net/sched/pedit.c +++ b/net/sched/pedit.c @@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) } static int -tcf_pedit(struct sk_buff **pskb, struct tc_action *a) +tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = PRIV(a, pedit); struct sk_buff *skb = *pskb; diff --git a/net/sched/police.c b/net/sched/police.c index c03545faf523..eb39fb2f39b6 100644 --- a/net/sched/police.c +++ b/net/sched/police.c @@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) +static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, + struct tcf_result *res) { psched_time_t now; struct sk_buff *skb = *pskb; diff --git a/net/sched/simple.c b/net/sched/simple.c index 3ab4c675ab5d..8a6ae4f491e8 100644 --- a/net/sched/simple.c +++ b/net/sched/simple.c @@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock); #include #include -static int tcf_simp(struct sk_buff **pskb, struct tc_action *a) +static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct sk_buff *skb = *pskb; struct tcf_defact *p = PRIV(a, defact); From ac3247baf8ecadf168642e3898b0212c29c79715 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:28:03 -0700 Subject: [PATCH 271/584] [NETFILTER]: connection tracking event notifiers This adds a notifier chain based event mechanism for ip_conntrack state changes. As opposed to the previous implementations in patch-o-matic, we do no longer need a field in the skb to achieve this. Thanks to the valuable input from Patrick McHardy and Rusty on the idea of a per_cpu implementation. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 144 ++++++++++++++++++ .../linux/netfilter_ipv4/ip_conntrack_core.h | 17 ++- net/ipv4/netfilter/Kconfig | 10 ++ net/ipv4/netfilter/ip_conntrack_core.c | 122 ++++++++++++++- net/ipv4/netfilter/ip_conntrack_ftp.c | 12 +- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 + net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 3 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 10 ++ 10 files changed, 311 insertions(+), 14 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ed720f0c4cd..ae1270c97b50 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -65,6 +65,63 @@ enum ip_conntrack_status { /* Both together */ IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), + + /* Connection is dying (removed from lists), can not be unset. */ + IPS_DYING_BIT = 9, + IPS_DYING = (1 << IPS_DYING_BIT), +}; + +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), }; #ifdef __KERNEL__ @@ -280,6 +337,11 @@ static inline int is_confirmed(struct ip_conntrack *ct) return test_bit(IPS_CONFIRMED_BIT, &ct->status); } +static inline int is_dying(struct ip_conntrack *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + extern unsigned int ip_conntrack_htable_size; struct ip_conntrack_stat @@ -303,6 +365,88 @@ struct ip_conntrack_stat #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#include + +struct ip_conntrack_ecache { + struct ip_conntrack *ct; + unsigned int events; +}; +DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x) + +extern struct notifier_block *ip_conntrack_chain; +extern struct notifier_block *ip_conntrack_expect_chain; + +static inline int ip_conntrack_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_chain, nb); +} + +static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_chain, nb); +} + +static inline int +ip_conntrack_expect_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_expect_chain, nb); +} + +static inline int +ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); +} + +static inline void +ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { + if (net_ratelimit()) { + printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); + dump_stack(); + } + } + ecache->events |= event; +} + +extern void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); +extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); + +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) +{ + if (is_confirmed(ct) && !is_dying(ct)) + notifier_call_chain(&ip_conntrack_chain, event, ct); +} + +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) +{ + notifier_call_chain(&ip_conntrack_expect_chain, event, exp); +} +#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */ +static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) {} +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) {} +static inline void ip_conntrack_deliver_cached_events_for( + struct ip_conntrack *ct) {} +static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) {} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + #ifdef CONFIG_IP_NF_NAT_NEEDED static inline int ip_nat_initialized(struct ip_conntrack *conntrack, enum ip_nat_manip_type manip) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 694aec9b4784..46eeea1e2733 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -38,12 +38,21 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int ip_conntrack_confirm(struct sk_buff **pskb) { - if ((*pskb)->nfct - && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) - return __ip_conntrack_confirm(pskb); - return NF_ACCEPT; + struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; + int ret = NF_ACCEPT; + + if (ct && !is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_conntrack_deliver_cached_events_for(ct); + + return ret; } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct ip_conntrack_ecache; +extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); +#endif + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 46d4cb1c06f0..ff3393eba924 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK of packets, but this mark value is kept in the conntrack session instead of the individual packets. +config IP_NF_CONNTRACK_EVENTS + bool "Connection tracking events" + depends on IP_NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + provide a notifier chain that can be used by other kernel code + to get notified about changes in the connection tracking state. + + IF unsure, say `N'. + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 04c3414361d4..caf89deae116 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -37,6 +37,7 @@ #include #include #include +#include /* ip_conntrack_lock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -49,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.1" +#define IP_CONNTRACK_VERSION "2.2" #if 0 #define DEBUGP printk @@ -76,6 +77,81 @@ unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct notifier_block *ip_conntrack_chain; +struct notifier_block *ip_conntrack_expect_chain; + +DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) + notifier_call_chain(&ip_conntrack_chain, ecache->events, + ecache->ct); + ecache->events = 0; +} + +void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + __deliver_cached_events(ecache); +} + +/* Deliver all cached events for a particular conntrack. This is called + * by code prior to async packet handling or freeing the skb */ +void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (!ct) + return; + + if (ecache->ct == ct) { + DEBUGP("ecache: delivering event for %p\n", ct); + __deliver_cached_events(ecache); + } else { + if (net_ratelimit()) + printk(KERN_WARNING "ecache: want to deliver for %p, " + "but cache has %p\n", ct, ecache->ct); + } + + /* signalize that events have already been delivered */ + ecache->ct = NULL; +} + +/* Deliver cached events for old pending events, if current conntrack != old */ +void ip_conntrack_event_cache_init(const struct sk_buff *skb) +{ + struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + /* take care of delivering potentially old events */ + if (ecache->ct != ct) { + enum ip_conntrack_info ctinfo; + /* we have to check, since at startup the cache is NULL */ + if (likely(ecache->ct)) { + DEBUGP("ecache: entered for different conntrack: " + "ecache->ct=%p, skb->nfct=%p. delivering " + "events\n", ecache->ct, ct); + __deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + } else { + DEBUGP("ecache: entered for conntrack %p, " + "cache was clean before\n", ct); + } + + /* initialize for this conntrack/packet */ + ecache->ct = ip_conntrack_get(skb, &ctinfo); + /* ecache->events cleared by __deliver_cached_devents() */ + } else { + DEBUGP("ecache: re-entered for conntrack %p.\n", ct); + } +} + +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); void @@ -223,6 +299,8 @@ destroy_conntrack(struct nf_conntrack *nfct) IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); + set_bit(IPS_DYING_BIT, &ct->status); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -261,6 +339,7 @@ static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; + ip_conntrack_event(IPCT_DESTROY, ct); write_lock_bh(&ip_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ @@ -374,6 +453,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb) set_bit(IPS_CONFIRMED_BIT, &ct->status); CONNTRACK_STAT_INC(insert); write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); +#ifdef CONFIG_IP_NF_NAT_NEEDED + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); +#endif + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; } @@ -607,7 +696,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_conntrack_protocol *proto; - int set_reply; + int set_reply = 0; int ret; /* Previously seen (loopback or untracked)? Ignore. */ @@ -666,6 +755,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, IP_NF_ASSERT((*pskb)->nfct); + ip_conntrack_event_cache_init(*pskb); + ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells @@ -676,8 +767,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return -ret; } - if (set_reply) - set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_STATUS, *pskb); return ret; } @@ -824,6 +915,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) evict_oldest_expect(expect->master); ip_conntrack_expect_insert(expect); + ip_conntrack_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&ip_conntrack_lock); @@ -861,8 +953,10 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { - if (tuplehash_to_ctrack(i)->helper == me) + if (tuplehash_to_ctrack(i)->helper == me) { + ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; + } return 0; } @@ -924,6 +1018,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); @@ -1012,6 +1107,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) ip_conntrack_put(ct); } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + { + /* we need to deliver all cached events in order to drop + * the reference counts */ + int cpu; + for_each_cpu(cpu) { + struct ip_conntrack_ecache *ecache = + &per_cpu(ip_conntrack_ecache, cpu); + if (ecache->ct) { + __ip_ct_deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + ecache->ct = NULL; + } + } + } +#endif } /* Fast function for those who don't want to parse /proc (and I don't diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 7a3b773be3f9..9658896f899a 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) +static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, + struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) oldest = i; } - if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - else if (oldest != NUM_SEQ_TO_REMEMBER) + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } else if (oldest != NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][oldest] = nl_seq; + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } } static int help(struct sk_buff **pskb, @@ -439,7 +443,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info,dir); + update_nl_seq(seq, ct_ftp_info,dir, *pskb); out: spin_unlock_bh(&ip_ftp_lock); return ret; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 602c74db3252..dca1f63d6f51 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -102,6 +102,7 @@ static int icmp_packet(struct ip_conntrack *ct, ct->timeout.function((unsigned long)ct); } else { atomic_inc(&ct->proto.icmp.count); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 31d75390bf12..3d5f878a07d1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack, } conntrack->proto.sctp.state = newconntrack; + if (oldsctpstate != newconntrack) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); write_unlock_bh(&sctp_lock); } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 809dfed766d4..a569ad1ee4d9 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -973,6 +973,10 @@ static int tcp_packet(struct ip_conntrack *conntrack, ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 8c1eaba098d4..6066eaf4d825 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack, ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + ip_conntrack_event_cache(IPCT_STATUS, skb); } else ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dccd4abab7ae..f0880004115d 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -402,6 +402,7 @@ static unsigned int ip_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + ip_conntrack_event_cache_init(*pskb); /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(pskb); } @@ -419,6 +420,7 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, ct = ip_conntrack_get(*pskb, &ctinfo); if (ct && ct->helper) { unsigned int ret; + ip_conntrack_event_cache_init(*pskb); ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) return ret; @@ -889,6 +891,7 @@ static int init_or_cleanup(int init) return ret; cleanup: + synchronize_net(); #ifdef CONFIG_SYSCTL unregister_sysctl_table(ip_ct_sysctl_header); cleanup_localinops: @@ -971,6 +974,13 @@ void need_ip_conntrack(void) { } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +EXPORT_SYMBOL_GPL(ip_conntrack_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); +EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); +EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); +#endif EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); EXPORT_SYMBOL(ip_ct_get_tuple); From f9e815b376dc19e6afc551cd755ac64e9e42d81f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:30:24 -0700 Subject: [PATCH 272/584] [NETFITLER]: Add nfnetlink layer. Introduce "nfnetlink" (netfilter netlink) layer. This layer is used as transport layer for all userspace communication of the new upcoming netfilter subsystems, such as ctnetlink, nfnetlink_queue and some day even the mythical pkttables ;) Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 145 ++++++++++++ net/Kconfig | 2 + net/Makefile | 1 + net/netfilter/Kconfig | 5 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink.c | 343 ++++++++++++++++++++++++++++ 6 files changed, 497 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink.h create mode 100644 net/netfilter/Kconfig create mode 100644 net/netfilter/Makefile create mode 100644 net/netfilter/nfnetlink.c diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h new file mode 100644 index 000000000000..8f1bfb8d650b --- /dev/null +++ b/include/linux/netfilter/nfnetlink.h @@ -0,0 +1,145 @@ +#ifndef _NFNETLINK_H +#define _NFNETLINK_H +#include + +/* nfnetlink groups: Up to 32 maximum */ +#define NF_NETLINK_CONNTRACK_NEW 0x00000001 +#define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 +#define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 +#define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 +#define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 +#define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 + +/* Generic structure for encapsulation optional netfilter information. + * It is reminiscent of sockaddr, but with sa_family replaced + * with attribute type. + * ! This should someday be put somewhere generic as now rtnetlink and + * ! nfnetlink use the same attributes methods. - J. Schulist. + */ + +struct nfattr +{ + u_int16_t nfa_len; + u_int16_t nfa_type; +} __attribute__ ((packed)); + +/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time + * to put this in a generic file */ + +#define NFA_ALIGNTO 4 +#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) +#define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \ + && (nfa)->nfa_len <= (len)) +#define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \ + (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len))) +#define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len)) +#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len)) +#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) +#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) +#define NFA_NEST(skb, type) \ +({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ + NFA_PUT(skb, type, 0, NULL); \ + __start; }) +#define NFA_NEST_END(skb, start) \ +({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) +#define NFA_NEST_CANCEL(skb, start) \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + u_int8_t nfgen_family; /* AF_xxx */ + u_int8_t version; /* nfnetlink version */ + u_int16_t res_id; /* resource id */ +} __attribute__ ((packed)); + +#define NFNETLINK_V1 1 + +#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) +#define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg)) + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +enum nfnl_subsys_id { + NFNL_SUBSYS_NONE = 0, + NFNL_SUBSYS_CTNETLINK, + NFNL_SUBSYS_CTNETLINK_EXP, + NFNL_SUBSYS_IPTNETLINK, + NFNL_SUBSYS_QUEUE, + NFNL_SUBSYS_ULOG, + NFNL_SUBSYS_COUNT, +}; + +#ifdef __KERNEL__ + +#include + +struct nfnl_callback +{ + kernel_cap_t cap_required; /* capabilities required for this msg */ + int (*call)(struct sock *nl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); +}; + +struct nfnetlink_subsystem +{ + const char *name; + __u8 subsys_id; /* nfnetlink subsystem ID */ + __u8 cb_count; /* number of callbacks */ + u_int32_t attr_count; /* number of nfattr's */ + struct nfnl_callback *cb; /* callback for individual types */ +}; + +extern void __nfa_fill(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +#define NFA_PUT(skb, attrtype, attrlen, data) \ +({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ + __nfa_fill(skb, attrtype, attrlen, data); }) + +extern struct semaphore nfnl_sem; + +#define nfnl_shlock() down(&nfnl_sem) +#define nfnl_shlock_nowait() down_trylock(&nfnl_sem) + +#define nfnl_shunlock() do { up(&nfnl_sem); \ + if(nfnl && nfnl->sk_receive_queue.qlen) \ + nfnl->sk_data_ready(nfnl, 0); \ + } while(0) + +extern void nfnl_lock(void); +extern void nfnl_unlock(void); + +extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); +extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); + +extern int nfattr_parse(struct nfattr *tb[], int maxattr, + struct nfattr *nfa, int len); + +#define nfattr_parse_nested(tb, max, nfa) \ + nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa))) + +#define nfattr_bad_size(tb, max, cta_min) \ +({ int __i, __res = 0; \ + for (__i=0; __i, + * (C) 2002-2005 by Harald Welte + * (C) 2005 by Pablo Neira Ayuso + * + * Initial netfilter messages via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata nfversion[] = "0.30"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static struct sock *nfnl = NULL; +static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +DECLARE_MUTEX(nfnl_sem); + +void nfnl_lock(void) +{ + nfnl_shlock(); +} + +void nfnl_unlock(void) +{ + nfnl_shunlock(); +} + +int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +{ + DEBUGP("registering subsystem ID %u\n", n->subsys_id); + + /* If the netlink socket wasn't created, then fail */ + if (!nfnl) + return -1; + + nfnl_lock(); + subsys_table[n->subsys_id] = n; + nfnl_unlock(); + + return 0; +} + +int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +{ + DEBUGP("unregistering subsystem ID %u\n", n->subsys_id); + + nfnl_lock(); + subsys_table[n->subsys_id] = NULL; + nfnl_unlock(); + + return 0; +} + +static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +{ + u_int8_t subsys_id = NFNL_SUBSYS_ID(type); + + if (subsys_id >= NFNL_SUBSYS_COUNT + || subsys_table[subsys_id] == NULL) + return NULL; + + return subsys_table[subsys_id]; +} + +static inline struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +{ + u_int8_t cb_id = NFNL_MSG_TYPE(type); + + if (cb_id >= ss->cb_count) { + DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count); + return NULL; + } + + return &ss->cb[cb_id]; +} + +void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nfattr *nfa; + int size = NFA_LENGTH(attrlen); + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = attrtype; + nfa->nfa_len = size; + memcpy(NFA_DATA(nfa), data, attrlen); +} + +int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) +{ + memset(tb, 0, sizeof(struct nfattr *) * maxattr); + + while (NFA_OK(nfa, len)) { + unsigned flavor = nfa->nfa_type; + if (flavor && flavor <= maxattr) + tb[flavor-1] = nfa; + nfa = NFA_NEXT(nfa, len); + } + + return 0; +} + +/** + * nfnetlink_check_attributes - check and parse nfnetlink attributes + * + * subsys: nfnl subsystem for which this message is to be parsed + * nlmsghdr: netlink message to be checked/parsed + * cda: array of pointers, needs to be at least subsys->attr_count big + * + */ +static int +nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, + struct nlmsghdr *nlh, struct nfattr *cda[]) +{ + int min_len; + + memset(cda, 0, sizeof(struct nfattr *) * subsys->attr_count); + + /* check attribute lengths. */ + min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); + if (nlh->nlmsg_len < min_len) + return -EINVAL; + + if (nlh->nlmsg_len > min_len) { + struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + + while (NFA_OK(attr, attrlen)) { + unsigned flavor = attr->nfa_type; + if (flavor) { + if (flavor > subsys->attr_count) + return -EINVAL; + cda[flavor - 1] = attr; + } + attr = NFA_NEXT(attr, attrlen); + } + } else + return -EINVAL; + + return 0; +} + +int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +{ + int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + int err = 0; + + NETLINK_CB(skb).dst_groups = group; + if (echo) + atomic_inc(&skb->users); + netlink_broadcast(nfnl, skb, pid, group, allocation); + if (echo) + err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); + + return err; +} + +int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +{ + return netlink_unicast(nfnl, skb, pid, flags); +} + +/* Process one complete nfnetlink message. */ +static inline int nfnetlink_rcv_msg(struct sk_buff *skb, + struct nlmsghdr *nlh, int *errp) +{ + struct nfnl_callback *nc; + struct nfnetlink_subsystem *ss; + int type, err = 0; + + DEBUGP("entered; subsys=%u, msgtype=%u\n", + NFNL_SUBSYS_ID(nlh->nlmsg_type), + NFNL_MSG_TYPE(nlh->nlmsg_type)); + + /* Only requests are handled by kernel now. */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + DEBUGP("received non-request message\n"); + return 0; + } + + /* All the messages must at least contain nfgenmsg */ + if (nlh->nlmsg_len < + NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { + DEBUGP("received message was too short\n"); + return 0; + } + + type = nlh->nlmsg_type; + ss = nfnetlink_get_subsys(type); + if (!ss) + goto err_inval; + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + DEBUGP("unable to find client for type %d\n", type); + goto err_inval; + } + + if (nc->cap_required && + !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { + DEBUGP("permission denied for type %d\n", type); + *errp = -EPERM; + return -1; + } + + { + struct nfattr *cda[ss->attr_count]; + + memset(cda, 0, ss->attr_count*sizeof(struct nfattr *)); + + err = nfnetlink_check_attributes(ss, nlh, cda); + if (err < 0) + goto err_inval; + + err = nc->call(nfnl, skb, nlh, cda, errp); + *errp = err; + return err; + } + +err_inval: + *errp = -EINVAL; + return -1; +} + +/* Process one packet of messages. */ +static inline int nfnetlink_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr *nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + u32 rlen; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(struct nlmsghdr) + || skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (nfnetlink_rcv_msg(skb, nlh, &err)) { + if (!err) + return -1; + netlink_ack(skb, nlh, err); + } else + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +static void nfnetlink_rcv(struct sock *sk, int len) +{ + do { + struct sk_buff *skb; + + if (nfnl_shlock_nowait()) + return; + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (nfnetlink_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->sk_receive_queue, + skb); + else + kfree_skb(skb); + break; + } + kfree_skb(skb); + } + + up(&nfnl_sem); + } while(nfnl && nfnl->sk_receive_queue.qlen); +} + +void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + sock_release(nfnl->sk_socket); + return; +} + +int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + if (!nfnl) { + printk(KERN_ERR "cannot initialize nfnetlink!\n"); + return -1; + } + + return 0; +} + +module_init(nfnetlink_init); +module_exit(nfnetlink_exit); + +EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); +EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); +EXPORT_SYMBOL_GPL(nfnetlink_send); +EXPORT_SYMBOL_GPL(nfnetlink_unicast); +EXPORT_SYMBOL_GPL(nfattr_parse); +EXPORT_SYMBOL_GPL(__nfa_fill); From b0573dea1fb32ebc72ffa05980fd840df1d80860 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:30:51 -0700 Subject: [PATCH 273/584] [NET]: Introduce SO_{SND,RCV}BUFFORCE socket options Allows overriding of sysctl_{wmem,rmrm}_max Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/asm-alpha/socket.h | 2 ++ include/asm-arm/socket.h | 2 ++ include/asm-arm26/socket.h | 2 ++ include/asm-cris/socket.h | 2 ++ include/asm-frv/socket.h | 2 ++ include/asm-h8300/socket.h | 2 ++ include/asm-i386/socket.h | 2 ++ include/asm-ia64/socket.h | 2 ++ include/asm-m32r/socket.h | 2 ++ include/asm-m68k/socket.h | 2 ++ include/asm-mips/socket.h | 2 ++ include/asm-parisc/socket.h | 2 ++ include/asm-ppc/socket.h | 2 ++ include/asm-ppc64/socket.h | 2 ++ include/asm-s390/socket.h | 2 ++ include/asm-sh/socket.h | 2 ++ include/asm-sparc/socket.h | 2 ++ include/asm-sparc64/socket.h | 2 ++ include/asm-v850/socket.h | 2 ++ include/asm-x86_64/socket.h | 2 ++ include/asm-xtensa/socket.h | 2 ++ net/core/sock.c | 18 ++++++++++++++++-- 22 files changed, 58 insertions(+), 2 deletions(-) diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index d00259d3dc78..b5193229132a 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -25,6 +25,8 @@ #define SO_ERROR 0x1007 #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_RCVLOWAT 0x1010 #define SO_SNDLOWAT 0x1011 #define SO_RCVTIMEO 0x1012 diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index 46d20585d951..3c51da6438c9 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h index 46d20585d951..3c51da6438c9 100644 --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index f159b4f165f7..8b1da3e58c55 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -16,6 +16,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index c3be17c7de4b..7177f8b9817c 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index af33b8525dcf..d98cf85bafc1 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 07f6b38ad140..802ae76195b7 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index 21a9f10d6baa..a255006fb7b5 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -23,6 +23,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 159519d99042..8b6680f223c0 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 8d0b9fc2d07e..f578ca4b776a 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 020b4db70ee5..d478a86294ee 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_ERROR 0x1007 /* get error status and clear */ #define SO_SNDBUF 0x1001 /* Send buffer size. */ #define SO_RCVBUF 0x1002 /* Receive buffer. */ +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 /* send low-water mark */ #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ #define SO_SNDTIMEO 0x1005 /* send timeout */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index 4a77996c1862..1bf54dc53c10 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -16,6 +16,8 @@ /* To add :#define SO_REUSEPORT 0x0200 */ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 #define SO_RCVLOWAT 0x1004 #define SO_SNDTIMEO 0x1005 diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index 4134376b0f66..296e1a3469d0 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -20,6 +20,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h index 59e00dfc8b8e..9e1af8eb2d96 100644 --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h @@ -21,6 +21,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 0e96eeca4e6b..15a5298c8744 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -22,6 +22,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index dde696c3b4c7..553904ff9336 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_RCVBUFFORCE 32 +#define SO_SNDBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index c1154e3ecfdf..09575b608adb 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index 865547a23908..59987dad3359 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index 213b852af53e..0240d366a0a4 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index d9a252ea8210..f2cdbeae5d5b 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h index daccd05a14cd..00f83f3a6d72 100644 --- a/include/asm-xtensa/socket.h +++ b/include/asm-xtensa/socket.h @@ -24,6 +24,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/net/core/sock.c b/net/core/sock.c index 12f6d9a2a522..51a5e7ddee85 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val > sysctl_wmem_max) val = sysctl_wmem_max; - +set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; if ((val * 2) < SOCK_MIN_SNDBUF) sk->sk_sndbuf = SOCK_MIN_SNDBUF; @@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->sk_write_space(sk); break; + case SO_SNDBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_sndbuf; + case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to @@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val > sysctl_rmem_max) val = sysctl_rmem_max; - +set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* FIXME: is this lower bound the right one? */ if ((val * 2) < SOCK_MIN_RCVBUF) @@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->sk_rcvbuf = val * 2; break; + case SO_RCVBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_rcvbuf; + case SO_KEEPALIVE: #ifdef CONFIG_INET if (sk->sk_protocol == IPPROTO_TCP) From 6f1cf16582160c4839f05007c978743911aa022b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 9 Aug 2005 19:31:17 -0700 Subject: [PATCH 274/584] [NET]: Remove HIPPI private from skbuff.h This removes the private element from skbuff, that is only used by HIPPI. Instead it uses skb->cb[] to hold the additional data that is needed in the output path from hard_header to device driver. PS: The only qdisc that might potentially corrupt this cb[] is if netem was used over HIPPI. I will take care of that by fixing netem to use skb->stamp. I don't expect many users of netem over HIPPI Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/rrunner.c | 3 ++- include/linux/hippidevice.h | 5 +++++ include/linux/skbuff.h | 6 ------ net/802/hippi.c | 4 +++- net/core/skbuff.c | 3 --- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index 12a86f96d973..ec1a18d189a1 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -1429,6 +1429,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct rr_private *rrpriv = netdev_priv(dev); struct rr_regs __iomem *regs = rrpriv->regs; + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; struct ring_ctrl *txctrl; unsigned long flags; u32 index, len = skb->len; @@ -1460,7 +1461,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) ifield = (u32 *)skb_push(skb, 8); ifield[0] = 0; - ifield[1] = skb->private.ifield; + ifield[1] = hcb->ifield; /* * We don't need the lock before we are actually going to start diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9debe6bbe5f0..9bc3b688d2ee 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -26,6 +26,11 @@ #include #ifdef __KERNEL__ + +struct hippi_cb { + __u32 ifield; +}; + extern unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f10a8b9628b0..4aeadb102589 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -193,7 +193,6 @@ struct skb_shared_info { * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict */ @@ -265,11 +264,6 @@ struct sk_buff { struct nf_bridge_info *nf_bridge; #endif #endif /* CONFIG_NETFILTER */ -#if defined(CONFIG_HIPPI) - union { - __u32 ifield; - } private; -#endif #ifdef CONFIG_NET_SCHED __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT diff --git a/net/802/hippi.c b/net/802/hippi.c index 051e8af56a77..cb45ae1310cb 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; if (!len){ len = skb->len - HIPPI_HLEN; @@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, if (daddr) { memcpy(hip->le.dest_switch_addr, daddr + 3, 3); - memcpy(&skb->private.ifield, daddr + 2, 4); + memcpy(&hcb->ifield, daddr + 2, 4); return HIPPI_HLEN; } + hcb->ifield = 0; return -((int)HIPPI_HLEN); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8896e6f8aa42..16df7bd77e78 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -365,9 +365,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT From 080774a243f56ce2195ace96fba3d18548ee48ce Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:32:58 -0700 Subject: [PATCH 275/584] [NETFILTER]: Add ctnetlink subsystem Add ctnetlink subsystem for userspace-access to ip_conntrack table. This allows reading and updating of existing entries, as well as creating new ones (and new expect's) via nfnetlink. Please note the 'strange' byte order: nfattr (tag+length) are in host byte order, while the payload is always guaranteed to be in network byte order. This allows a simple userspace process to encapsulate netlink messages into arch-independent udp packets by just processing/swapping the headers and not knowing anything about the actual payload. Signed-off-by: Harald Welte Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_conntrack.h | 123 ++ include/linux/netfilter_ipv4/ip_conntrack.h | 46 +- .../linux/netfilter_ipv4/ip_conntrack_core.h | 5 + .../netfilter_ipv4/ip_conntrack_helper.h | 2 + .../netfilter_ipv4/ip_conntrack_protocol.h | 24 +- .../linux/netfilter_ipv4/ip_nat_protocol.h | 25 +- net/ipv4/netfilter/Kconfig | 7 + net/ipv4/netfilter/Makefile | 4 + net/ipv4/netfilter/ip_conntrack_core.c | 281 ++- net/ipv4/netfilter/ip_conntrack_netlink.c | 1588 +++++++++++++++++ net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 64 +- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 7 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 23 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 5 + net/ipv4/netfilter/ip_conntrack_standalone.c | 38 +- net/ipv4/netfilter/ip_nat_core.c | 99 +- net/ipv4/netfilter/ip_nat_proto_icmp.c | 9 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 10 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 9 +- net/ipv4/netfilter/ip_nat_proto_unknown.c | 2 +- net/ipv4/netfilter/ip_nat_standalone.c | 2 + net/netfilter/nfnetlink.c | 1 + 23 files changed, 2277 insertions(+), 100 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_conntrack.h create mode 100644 net/ipv4/netfilter/ip_conntrack_netlink.c diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 8f1bfb8d650b..ace7a7be0742 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -56,7 +56,7 @@ struct nfgenmsg { u_int16_t res_id; /* resource id */ } __attribute__ ((packed)); -#define NFNETLINK_V1 1 +#define NFNETLINK_V0 0 #define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) @@ -81,6 +81,7 @@ enum nfnl_subsys_id { #ifdef __KERNEL__ +#include #include struct nfnl_callback diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h new file mode 100644 index 000000000000..fb528e0e3bd9 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -0,0 +1,123 @@ +#ifndef _IPCONNTRACK_NETLINK_H +#define _IPCONNTRACK_NETLINK_H +#include + +enum cntl_msg_types { + IPCTNL_MSG_CT_NEW, + IPCTNL_MSG_CT_GET, + IPCTNL_MSG_CT_DELETE, + IPCTNL_MSG_CT_GET_CTRZERO, + + IPCTNL_MSG_MAX +}; + +enum ctnl_exp_msg_types { + IPCTNL_MSG_EXP_NEW, + IPCTNL_MSG_EXP_GET, + IPCTNL_MSG_EXP_DELETE, + + IPCTNL_MSG_EXP_MAX +}; + + +enum ctattr_type { + CTA_UNSPEC, + CTA_TUPLE_ORIG, + CTA_TUPLE_REPLY, + CTA_STATUS, + CTA_PROTOINFO, + CTA_HELP, + CTA_NAT, + CTA_TIMEOUT, + CTA_MARK, + CTA_COUNTERS_ORIG, + CTA_COUNTERS_REPLY, + CTA_USE, + CTA_EXPECT, + CTA_ID, + __CTA_MAX +}; +#define CTA_MAX (__CTA_MAX - 1) + +enum ctattr_tuple { + CTA_TUPLE_UNSPEC, + CTA_TUPLE_IP, + CTA_TUPLE_PROTO, + __CTA_TUPLE_MAX +}; +#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) + +enum ctattr_ip { + CTA_IP_UNSPEC, + CTA_IP_V4_SRC, + CTA_IP_V4_DST, + CTA_IP_V6_SRC, + CTA_IP_V6_DST, + __CTA_IP_MAX +}; +#define CTA_IP_MAX (__CTA_IP_MAX - 1) + +enum ctattr_l4proto { + CTA_PROTO_UNSPEC, + CTA_PROTO_NUM, + CTA_PROTO_SRC_PORT, + CTA_PROTO_DST_PORT, + CTA_PROTO_ICMP_ID, + CTA_PROTO_ICMP_TYPE, + CTA_PROTO_ICMP_CODE, + __CTA_PROTO_MAX +}; +#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) + +enum ctattr_protoinfo { + CTA_PROTOINFO_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_MAX +}; +#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) + +enum ctattr_counters { + CTA_COUNTERS_UNSPEC, + CTA_COUNTERS_PACKETS, + CTA_COUNTERS_BYTES, + __CTA_COUNTERS_MAX +}; +#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) + +enum ctattr_nat { + CTA_NAT_UNSPEC, + CTA_NAT_MINIP, + CTA_NAT_MAXIP, + CTA_NAT_PROTO, + __CTA_NAT_MAX +}; +#define CTA_NAT_MAX (__CTA_NAT_MAX - 1) + +enum ctattr_protonat { + CTA_PROTONAT_UNSPEC, + CTA_PROTONAT_PORT_MIN, + CTA_PROTONAT_PORT_MAX, + __CTA_PROTONAT_MAX +}; +#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) + +enum ctattr_expect { + CTA_EXPECT_UNSPEC, + CTA_EXPECT_TUPLE, + CTA_EXPECT_MASK, + CTA_EXPECT_TIMEOUT, + CTA_EXPECT_ID, + __CTA_EXPECT_MAX +}; +#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) + +enum ctattr_help { + CTA_HELP_UNSPEC, + CTA_HELP_NAME, + __CTA_HELP_MAX +}; +#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) + +#define CTA_HELP_MAXNAMESIZE 32 + +#endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ae1270c97b50..ff2c1c6001f9 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -209,6 +209,9 @@ struct ip_conntrack /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper, if any. */ struct ip_conntrack_helper *helper; @@ -257,6 +260,9 @@ struct ip_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -296,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) } /* decrement reference count on a conntrack */ -extern void ip_conntrack_put(struct ip_conntrack *ct); +static inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} /* call to create an explicit dependency on ip_conntrack. */ extern void need_ip_conntrack(void); @@ -331,6 +342,39 @@ extern void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), void *data); +extern struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *); +extern struct ip_conntrack_helper * +ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple); +extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper); + +extern struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol); +extern struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol); +extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto); + +extern void ip_ct_remove_expectations(struct ip_conntrack *ct); + +extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *, + struct ip_conntrack_tuple *); + +extern void ip_conntrack_free(struct ip_conntrack *ct); + +extern void ip_conntrack_hash_insert(struct ip_conntrack *ct); + +extern struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern void ip_conntrack_flush(void); + /* It's confirmed if it is, or has been in the hash table. */ static inline int is_confirmed(struct ip_conntrack *ct) { diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 46eeea1e2733..fbf6c3e41647 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -2,6 +2,9 @@ #define _IP_CONNTRACK_CORE_H #include +#define MAX_IP_CT_PROTO 256 +extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; + /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ @@ -53,6 +56,8 @@ struct ip_conntrack_ecache; extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); #endif +extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 3692daa93dec..8d69279ccfe4 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -24,6 +24,8 @@ struct ip_conntrack_helper int (*help)(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index e20b57c5e1b7..b6b99be8632a 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -2,6 +2,7 @@ #ifndef _IP_CONNTRACK_PROTOCOL_H #define _IP_CONNTRACK_PROTOCOL_H #include +#include struct seq_file; @@ -47,22 +48,22 @@ struct ip_conntrack_protocol int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct ip_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct ip_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; -#define MAX_IP_CT_PROTO 256 -extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; - /* Protocol registration. */ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); - -static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) -{ - return ip_ct_protos[protocol]; -} - /* Existing built-in protocols */ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; @@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void); /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *, + const struct ip_conntrack_tuple *); +extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *); + #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index 129708c22386..ef63aa991a06 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -4,6 +4,9 @@ #include #include +#include +#include + struct iphdr; struct ip_nat_range; @@ -15,6 +18,8 @@ struct ip_nat_protocol /* Protocol number. */ unsigned int protonum; + struct module *me; + /* Translate a packet to the target according to manip type. Return true if succeeded. */ int (*manip_pkt)(struct sk_buff **pskb, @@ -43,19 +48,20 @@ struct ip_nat_protocol unsigned int (*print_range)(char *buffer, const struct ip_nat_range *range); -}; -#define MAX_IP_NAT_PROTO 256 -extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + int (*range_to_nfattr)(struct sk_buff *skb, + const struct ip_nat_range *range); + + int (*nfattr_to_range)(struct nfattr *tb[], + struct ip_nat_range *range); +}; /* Protocol registration. */ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); -static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) -{ - return ip_nat_protos[protocol]; -} +extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol); +extern void ip_nat_proto_put(struct ip_nat_protocol *proto); /* Built-in protocols. */ extern struct ip_nat_protocol ip_nat_protocol_tcp; @@ -67,4 +73,9 @@ extern int init_protocols(void) __init; extern void cleanup_protocols(void); extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); +extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range); +extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[], + struct ip_nat_range *range); + #endif /*_IP_NAT_PROTO_H*/ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ff3393eba924..e47ba39eb657 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -702,5 +702,12 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + help + This option enables support for a netlink-based userspace interface + + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 45796d5924dd..abf2a7d1a584 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -9,6 +9,10 @@ iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +# conntrack netlink interface +obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o + + # SCTP protocol connection tracking obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index caf89deae116..d9fddae8d787 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -50,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.2" +#define IP_CONNTRACK_VERSION "2.3" #if 0 #define DEBUGP printk @@ -77,6 +77,8 @@ unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; +static unsigned int ip_conntrack_next_id = 1; +static unsigned int ip_conntrack_expect_next_id = 1; #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS struct notifier_block *ip_conntrack_chain; struct notifier_block *ip_conntrack_expect_chain; @@ -154,13 +156,6 @@ void ip_conntrack_event_cache_init(const struct sk_buff *skb) DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); -void -ip_conntrack_put(struct ip_conntrack *ct) -{ - IP_NF_ASSERT(ct); - nf_conntrack_put(&ct->ct_general); -} - static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; @@ -222,6 +217,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp) exp->master->expecting--; } +void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) +{ + unlink_expect(exp); + ip_conntrack_expect_put(exp); +} + static void expectation_timed_out(unsigned long ul_expect) { struct ip_conntrack_expect *exp = (void *)ul_expect; @@ -232,6 +233,33 @@ static void expectation_timed_out(unsigned long ul_expect) ip_conntrack_expect_put(exp); } +struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + list_for_each_entry(i, &ip_conntrack_expect_list, list) { + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + atomic_inc(&i->use); + return i; + } + } + return NULL; +} + +/* Just find a expectation corresponding to a tuple. */ +struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + read_lock_bh(&ip_conntrack_lock); + i = __ip_conntrack_expect_find(tuple); + read_unlock_bh(&ip_conntrack_lock); + + return i; +} + /* If an expectation for this connection is found, it gets delete from * global list then returned. */ static struct ip_conntrack_expect * @@ -256,7 +284,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple) } /* delete all expectations for this conntrack */ -static void remove_expectations(struct ip_conntrack *ct) +void ip_ct_remove_expectations(struct ip_conntrack *ct) { struct ip_conntrack_expect *i, *tmp; @@ -286,7 +314,7 @@ clean_from_lists(struct ip_conntrack *ct) LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); } static void @@ -304,7 +332,7 @@ destroy_conntrack(struct nf_conntrack *nfct) /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ - proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); @@ -316,7 +344,7 @@ destroy_conntrack(struct nf_conntrack *nfct) * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); /* We overload first tuple to link into unconfirmed list. */ if (!is_confirmed(ct)) { @@ -331,8 +359,7 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_put(ct->master); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); - kmem_cache_free(ip_conntrack_cachep, ct); - atomic_dec(&ip_conntrack_count); + ip_conntrack_free(ct); } static void death_by_timeout(unsigned long ul_conntrack) @@ -359,7 +386,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, && ip_ct_tuple_equal(tuple, &i->tuple); } -static struct ip_conntrack_tuple_hash * +struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) { @@ -394,6 +421,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, return h; } +static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, + unsigned int hash, + unsigned int repl_hash) +{ + ct->id = ++ip_conntrack_next_id; + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); +} + +void ip_conntrack_hash_insert(struct ip_conntrack *ct) +{ + unsigned int hash, repl_hash; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + write_lock_bh(&ip_conntrack_lock); + __ip_conntrack_hash_insert(ct, hash, repl_hash); + write_unlock_bh(&ip_conntrack_lock); +} + /* Confirm a connection given skb; places it in hash table */ int __ip_conntrack_confirm(struct sk_buff **pskb) @@ -440,10 +490,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY]); + __ip_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -527,34 +574,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i, return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); } -static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +static struct ip_conntrack_helper * +__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { return LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, tuple); } -/* Allocate a new conntrack: we return -ENOMEM if classification - failed due to stress. Otherwise it really is unclassifiable. */ -static struct ip_conntrack_tuple_hash * -init_conntrack(const struct ip_conntrack_tuple *tuple, - struct ip_conntrack_protocol *protocol, - struct sk_buff *skb) +struct ip_conntrack_helper * +ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_helper *helper; + + /* need ip_conntrack_lock to assure that helper exists until + * try_module_get() is called */ + read_lock_bh(&ip_conntrack_lock); + + helper = __ip_conntrack_helper_find(tuple); + if (helper) { + /* need to increase module usage count to assure helper will + * not go away while the caller is e.g. busy putting a + * conntrack in the hash that uses the helper */ + if (!try_module_get(helper->me)) + helper = NULL; + } + + read_unlock_bh(&ip_conntrack_lock); + + return helper; +} + +void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) +{ + module_put(helper->me); +} + +struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol) +{ + return ip_ct_protos[protocol]; +} + +/* this is guaranteed to always return a valid protocol helper, since + * it falls back to generic_protocol */ +struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + preempt_disable(); + p = __ip_conntrack_proto_find(protocol); + if (p) { + if (!try_module_get(p->me)) + p = &ip_conntrack_generic_protocol; + } + preempt_enable(); + + return p; +} + +void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) +{ + module_put(p->me); +} + +struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, + struct ip_conntrack_tuple *repl) { struct ip_conntrack *conntrack; - struct ip_conntrack_tuple repl_tuple; - size_t hash; - struct ip_conntrack_expect *exp; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); ip_conntrack_hash_rnd_initted = 1; } - hash = hash_conntrack(tuple); - if (ip_conntrack_max && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { if (net_ratelimit()) @@ -565,31 +662,58 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, } } - if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { - DEBUGP("Can't invert tuple.\n"); - return NULL; - } - conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); - return ERR_PTR(-ENOMEM); + return NULL; } memset(conntrack, 0, sizeof(*conntrack)); atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; - if (!protocol->new(conntrack, skb)) { - kmem_cache_free(ip_conntrack_cachep, conntrack); - return NULL; - } + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + atomic_inc(&ip_conntrack_count); + + return conntrack; +} + +void +ip_conntrack_free(struct ip_conntrack *conntrack) +{ + atomic_dec(&ip_conntrack_count); + kmem_cache_free(ip_conntrack_cachep, conntrack); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + * failed due to stress. Otherwise it really is unclassifiable */ +static struct ip_conntrack_tuple_hash * +init_conntrack(struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + struct ip_conntrack_expect *exp; + + if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + if (!(conntrack = ip_conntrack_alloc(tuple, &repl_tuple))) + return NULL; + + if (!protocol->new(conntrack, skb)) { + ip_conntrack_free(conntrack); + return NULL; + } + write_lock_bh(&ip_conntrack_lock); exp = find_expectation(tuple); @@ -610,7 +734,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); } else { - conntrack->helper = ip_ct_find_helper(&repl_tuple); + conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); CONNTRACK_STAT_INC(new); } @@ -618,7 +742,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); - atomic_inc(&ip_conntrack_count); write_unlock_bh(&ip_conntrack_lock); if (exp) { @@ -729,7 +852,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, } #endif - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); + proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter @@ -777,7 +900,7 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { return ip_ct_invert_tuple(inverse, orig, - ip_ct_find_proto(orig->dst.protonum)); + __ip_conntrack_proto_find(orig->dst.protonum)); } /* Would two expected things clash? */ @@ -857,6 +980,8 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; add_timer(&exp->timeout); + exp->id = ++ip_conntrack_expect_next_id; + atomic_inc(&exp->use); CONNTRACK_STAT_INC(expect_create); } @@ -936,7 +1061,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) - conntrack->helper = ip_ct_find_helper(newreply); + conntrack->helper = __ip_conntrack_helper_find(newreply); write_unlock_bh(&ip_conntrack_lock); } @@ -950,6 +1075,19 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) return 0; } +struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *name) +{ + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (!strcmp(h->name, name)) + return h; + } + + return NULL; +} + static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { @@ -1025,6 +1163,39 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, } } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be + * in ip_conntrack_core, since we don't want the protocols to autoload + * or depend on ctnetlink */ +int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + &tuple->src.u.tcp.port); + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + &tuple->dst.u.tcp.port); + return 0; + +nfattr_failure: + return -1; +} + +int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *t) +{ + if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) + return -EINVAL; + + t->src.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + t->dst.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + + return 0; +} +#endif + /* Returns new sk_buff, or NULL */ struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) @@ -1203,16 +1374,13 @@ static void free_conntrack_hash(void) * ip_conntrack_htable_size)); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void ip_conntrack_cleanup(void) +void ip_conntrack_flush() { - ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); - + i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { @@ -1222,7 +1390,14 @@ void ip_conntrack_cleanup(void) /* wait until all references to ip_conntrack_untracked are dropped */ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) schedule(); +} +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + ip_ct_attach = NULL; + ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); free_conntrack_hash(); diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c new file mode 100644 index 000000000000..f43ec18c9166 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -0,0 +1,1588 @@ +/* Connection tracking via netlink socket. Allows for user space + * protocol helpers and general trouble making from userspace. + * + * (C) 2001 by Jay Schulist + * (C) 2002-2005 by Harald Welte + * (C) 2003 by Patrick Mchardy + * (C) 2005 by Pablo Neira Ayuso + * + * I've reworked this stuff to use attributes instead of conntrack + * structures. 5.44 am. I need more tea. --pablo 05/07/11. + * + * Initial connection tracking via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata version[] = "0.90"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + + +static inline int +ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_protocol *proto; + + NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + if (proto && proto->tuple_to_nfattr) + return proto->tuple_to_nfattr(skb, tuple); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_tuples(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct nfattr *nest_parms; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + ctnetlink_dump_tuples_proto(skb, tuple); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t status = htonl((u_int32_t) ct->status); + NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + long timeout_l = ct->timeout.expires - jiffies; + u_int32_t timeout; + + if (timeout_l < 0) + timeout = 0; + else + timeout = htonl(timeout_l / HZ); + + NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + + struct nfattr *nest_proto; + int ret; + + if (!proto || !proto->to_nfattr) + return 0; + + nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + + ret = proto->to_nfattr(skb, nest_proto, ct); + + ip_conntrack_proto_put(proto); + + NFA_NEST_END(skb, nest_proto); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct nfattr *nest_helper; + + if (!ct->helper) + return 0; + + nest_helper = NFA_NEST(skb, CTA_HELP); + NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); + + if (ct->helper->to_nfattr) + ct->helper->to_nfattr(skb, ct); + + NFA_NEST_END(skb, nest_helper); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static inline int +ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, + enum ip_conntrack_dir dir) +{ + enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nfattr *nest_count = NFA_NEST(skb, type); + u_int64_t tmp; + + tmp = cpu_to_be64(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + + tmp = cpu_to_be64(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + + NFA_NEST_END(skb, nest_count); + + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_counters(a, b, c) (0) +#endif + +#ifdef CONFIG_IP_NF_CONNTRACK_MARK +static inline int +ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t mark = htonl(ct->mark); + + NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_mark(a, b) (0) +#endif + +static inline int +ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + unsigned int use = htonl(atomic_read(&ct->ct_general.use)); + + NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + return 0; + +nfattr_failure: + return -1; +} + +#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + +static int +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, int nowait, + const struct ip_conntrack *ct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + if (ctnetlink_dump_status(skb, ct) < 0 || + ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0 || + ctnetlink_dump_helpinfo(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + struct ip_conntrack *ct = (struct ip_conntrack *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + unsigned int flags = 0, groups; + + /* ignore our fake conntrack entry */ + if (ct == &ip_conntrack_untracked) + return NOTIFY_DONE; + + if (events & IPCT_DESTROY) { + type = IPCTNL_MSG_CT_DELETE; + groups = NF_NETLINK_CONNTRACK_DESTROY; + goto alloc_skb; + } + if (events & (IPCT_NEW | IPCT_RELATED)) { + type = IPCTNL_MSG_CT_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + /* dump everything */ + events = ~0UL; + groups = NF_NETLINK_CONNTRACK_NEW; + goto alloc_skb; + } + if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { + type = IPCTNL_MSG_CT_NEW; + groups = NF_NETLINK_CONNTRACK_UPDATE; + goto alloc_skb; + } + + return NOTIFY_DONE; + +alloc_skb: + /* FIXME: Check if there are any listeners before, don't hurt performance */ + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + /* NAT stuff is now a status flag */ + if ((events & IPCT_STATUS || events & IPCT_NATINFO) + && ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_REFRESH + && ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_HELPINFO + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + nfnetlink_send(skb, 0, groups, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + +static int ctnetlink_done(struct netlink_callback *cb) +{ + DEBUGP("entered %s\n", __FUNCTION__); + return 0; +} + +static int +ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, + cb->args[0], *id); + + read_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + } + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + +static const int cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), + [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +}; + +static inline int +ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_IP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_IP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + if (!tb[CTA_IP_V4_SRC-1]) + return -EINVAL; + tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + + if (!tb[CTA_IP_V4_DST-1]) + return -EINVAL; + tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +static const int cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_NUM-1] = sizeof(u_int16_t), + [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), +}; + +static inline int +ctnetlink_parse_tuple_proto(struct nfattr *attr, + struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_PROTO_MAX]; + struct ip_conntrack_protocol *proto; + int ret = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTO_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + if (!tb[CTA_PROTO_NUM-1]) + return -EINVAL; + tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + + if (likely(proto && proto->nfattr_to_tuple)) { + ret = proto->nfattr_to_tuple(tb, tuple); + ip_conntrack_proto_put(proto); + } + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, + enum ctattr_tuple type) +{ + struct nfattr *tb[CTA_TUPLE_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_TUPLE_MAX * sizeof(tb)); + memset(tuple, 0, sizeof(*tuple)); + + if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) + goto nfattr_failure; + + if (!tb[CTA_TUPLE_IP-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + if (err < 0) + return err; + + if (!tb[CTA_TUPLE_PROTO-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + if (err < 0) + return err; + + /* orig and expect tuples get DIR_ORIGINAL */ + if (type == CTA_TUPLE_REPLY) + tuple->dst.dir = IP_CT_DIR_REPLY; + else + tuple->dst.dir = IP_CT_DIR_ORIGINAL; + + DUMP_TUPLE(tuple); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +static const int cta_min_protonat[CTA_PROTONAT_MAX] = { + [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), + [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +}; + +static int ctnetlink_parse_nat_proto(struct nfattr *attr, + const struct ip_conntrack *ct, + struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_PROTONAT_MAX]; + struct ip_nat_protocol *npt; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTONAT_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) + goto nfattr_failure; + + npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + if (!npt) + return 0; + + if (!npt->nfattr_to_range) { + ip_nat_proto_put(npt); + return 0; + } + + /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nfattr_to_range(tb, range) > 0) + range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + + ip_nat_proto_put(npt); + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_nat(struct nfattr *cda[], + const struct ip_conntrack *ct, struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_NAT_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_NAT_MAX * sizeof(tb)); + memset(range, 0, sizeof(*range)); + + if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) + goto nfattr_failure; + + if (tb[CTA_NAT_MINIP-1]) + range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + + if (!tb[CTA_NAT_MAXIP-1]) + range->max_ip = range->min_ip; + else + range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO-1]) + return 0; + + err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + if (err < 0) + return err; + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} +#endif + +static inline int +ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +{ + struct nfattr *tb[CTA_HELP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + memset(tb, 0, CTA_HELP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_HELP_NAME-1]) + return -EINVAL; + + *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else { + /* Flush the whole table */ + ip_conntrack_flush(); + return 0; + } + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash\n"); + return -ENOENT; + } + + ct = tuplehash_to_ctrack(h); + + if (cda[CTA_ID-1]) { + u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + if (ct->id != id) { + ip_conntrack_put(ct); + return -ENOENT; + } + } + if (del_timer(&ct->timeout)) { + ip_conntrack_put(ct); + ct->timeout.function((unsigned long)ct); + return 0; + } + ip_conntrack_put(ct); + DEBUGP("leaving\n"); + + return 0; +} + +static int +ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + struct sk_buff *skb2 = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_IP_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else + return -ENOTSUPP; +#endif + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash"); + return -ENOENT; + } + DEBUGP("tuple found\n"); + ct = tuplehash_to_ctrack(h); + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb2) { + ip_conntrack_put(ct); + return -ENOMEM; + } + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, 1, ct); + ip_conntrack_put(ct); + if (err <= 0) + goto out; + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; + + DEBUGP("leaving\n"); + return 0; + +out: + if (skb2) + kfree_skb(skb2); + return -1; +} + +static inline int +ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EINVAL; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EINVAL; + + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EINVAL; + + if (cda[CTA_NAT-1]) { +#ifndef CONFIG_IP_NF_NAT_NEEDED + return -EINVAL; +#else + unsigned int hooknum; + struct ip_nat_range range; + + if (ctnetlink_parse_nat(cda, ct, &range) < 0) + return -EINVAL; + + DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", + NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), + htons(range.min.all), htons(range.max.all)); + + /* This is tricky but it works. ip_nat_setup_info needs the + * hook number as parameter, so let's do the correct + * conversion and run away */ + if (status & IPS_SRC_NAT_DONE) + hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ + else if (status & IPS_DST_NAT_DONE) + hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ + else + return -EINVAL; /* Missing NAT flags */ + + DEBUGP("NAT status: %lu\n", + status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + + if (ip_nat_initialized(ct, hooknum)) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + + DEBUGP("NAT status after setup_info: %lu\n", + ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); +#endif + } + + /* Be careful here, modifying NAT bits can screw up things, + * so don't let users modify them directly if they don't pass + * ip_nat_range. */ + ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + return 0; +} + + +static inline int +ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct ip_conntrack_helper *helper; + char *helpname; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + /* don't change helper of sibling connections */ + if (ct->master) + return -EINVAL; + + err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + if (err < 0) + return err; + + helper = __ip_conntrack_helper_find_byname(helpname); + if (!helper) { + if (!strcmp(helpname, "")) + helper = NULL; + else + return -EINVAL; + } + + if (ct->helper) { + if (!helper) { + /* we had a helper before ... */ + ip_ct_remove_expectations(ct); + ct->helper = NULL; + } else { + /* need to zero data of old helper */ + memset(&ct->help, 0, sizeof(ct->help)); + } + } + + ct->helper = helper; + + return 0; +} + +static inline int +ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + if (!del_timer(&ct->timeout)) + return -ETIME; + + ct->timeout.expires = jiffies + timeout * HZ; + add_timer(&ct->timeout); + + return 0; +} + +static int +ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_HELP-1]) { + err = ctnetlink_change_helper(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_TIMEOUT-1]) { + err = ctnetlink_change_timeout(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + return err; + } + + DEBUGP("all done\n"); + return 0; +} + +static int +ctnetlink_create_conntrack(struct nfattr *cda[], + struct ip_conntrack_tuple *otuple, + struct ip_conntrack_tuple *rtuple) +{ + struct ip_conntrack *ct; + int err = -EINVAL; + + DEBUGP("entered %s\n", __FUNCTION__); + + ct = ip_conntrack_alloc(otuple, rtuple); + if (ct == NULL || IS_ERR(ct)) + return -ENOMEM; + + if (!cda[CTA_TIMEOUT-1]) + goto err; + ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->status |= IPS_CONFIRMED; + + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + + ct->helper = ip_conntrack_helper_find_get(rtuple); + + add_timer(&ct->timeout); + ip_conntrack_hash_insert(ct); + + if (ct->helper) + ip_conntrack_helper_put(ct->helper); + + DEBUGP("conntrack with id %u inserted\n", ct->id); + return 0; + +err: + ip_conntrack_free(ct); + return err; +} + +static int +ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple otuple, rtuple; + struct ip_conntrack_tuple_hash *h = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) { + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); + if (err < 0) + return err; + } + + if (cda[CTA_TUPLE_REPLY-1]) { + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); + if (err < 0) + return err; + } + + write_lock_bh(&ip_conntrack_lock); + if (cda[CTA_TUPLE_ORIG-1]) + h = __ip_conntrack_find(&otuple, NULL); + else if (cda[CTA_TUPLE_REPLY-1]) + h = __ip_conntrack_find(&rtuple, NULL); + + if (h == NULL) { + write_unlock_bh(&ip_conntrack_lock); + DEBUGP("no such conntrack, create new\n"); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + goto out_unlock; + } else { + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT-1]) { + err = -EINVAL; + goto out_unlock; + } + } + + /* We manipulate the conntrack inside the global conntrack table lock, + * so there's no need to increase the refcount */ + DEBUGP("conntrack found\n"); + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); + +out_unlock: + write_unlock_bh(&ip_conntrack_lock); + return err; +} + +/*********************************************************************** + * EXPECT + ***********************************************************************/ + +static inline int +ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple, + enum ctattr_expect type) +{ + struct nfattr *nest_parms = NFA_NEST(skb, type); + + if (ctnetlink_dump_tuples(skb, tuple) < 0) + goto nfattr_failure; + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_exp_dump_expect(struct sk_buff *skb, + const struct ip_conntrack_expect *exp) +{ + u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); + u_int32_t id = htonl(exp->id); + struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT); + + if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) + goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) + goto nfattr_failure; + + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, + int nowait, + const struct ip_conntrack_expect *exp) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_expect_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + int flags = 0; + u16 proto; + + if (events & IPEXP_NEW) { + type = IPCTNL_MSG_EXP_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + } else + return NOTIFY_DONE; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + proto = exp->tuple.dst.protonum; + nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif + +static int +ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack_expect *exp = NULL; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[0]; + + DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); + + read_lock_bh(&ip_conntrack_lock); + list_for_each(i, &ip_conntrack_expect_list) { + exp = (struct ip_conntrack_expect *) i; + if (exp->id <= *id) + continue; + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) + goto out; + *id = exp->id; + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last id=%llu\n", *id); + + return skb->len; +} + +static int +ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + struct sk_buff *skb2; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_exp_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + goto out; + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, + 1, exp); + if (err <= 0) + goto out; + + ip_conntrack_expect_put(exp); + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto free; + + return err; + +out: + ip_conntrack_expect_put(exp); +free: + if (skb2) + kfree_skb(skb2); + return err; +} + +static int +ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_expect *exp, *tmp; + struct ip_conntrack_tuple tuple; + struct ip_conntrack_helper *h; + int err; + + /* delete by tuple needs either orig or reply tuple */ + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else if (cda[CTA_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_HELP_NAME-1]); + + /* delete all expectations for this helper */ + write_lock_bh(&ip_conntrack_lock); + h = __ip_conntrack_helper_find_byname(name); + if (!h) { + write_unlock_bh(&ip_conntrack_lock); + return -EINVAL; + } + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (exp->master->helper == h + && del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock(&ip_conntrack_lock); + return 0; + } else { + /* This basically means we have to flush everything*/ + write_lock_bh(&ip_conntrack_lock); + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock_bh(&ip_conntrack_lock); + return 0; + } + + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + ip_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + ip_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. after this line usage count == 0 */ + ip_conntrack_expect_put(exp); + + return 0; +} +static int +ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) +{ + return -EOPNOTSUPP; +} + +static int +ctnetlink_create_expect(struct nfattr *cda[]) +{ + struct ip_conntrack_tuple tuple, mask, master_tuple; + struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_expect *exp; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASK); + if (err < 0) + return err; + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, + CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + /* Look for master conntrack of this expectation */ + h = ip_conntrack_find_get(&master_tuple, NULL); + if (!h) + return -ENOENT; + ct = tuplehash_to_ctrack(h); + + if (!ct->helper) { + /* such conntrack hasn't got any helper, abort */ + err = -EINVAL; + goto out; + } + + exp = ip_conntrack_expect_alloc(ct); + if (!exp) { + err = -ENOMEM; + goto out; + } + + exp->expectfn = NULL; + exp->master = ct; + memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); + memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); + + err = ip_conntrack_expect_related(exp); + ip_conntrack_expect_put(exp); + +out: + ip_conntrack_put(tuplehash_to_ctrack(h)); + return err; +} + +static int +ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + + write_lock_bh(&ip_conntrack_lock); + exp = __ip_conntrack_expect_find(&tuple); + + if (!exp) { + write_unlock_bh(&ip_conntrack_lock); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_expect(cda); + return err; + } + + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_expect(exp, cda); + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving\n"); + + return err; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static struct notifier_block ctnl_notifier = { + .notifier_call = ctnetlink_conntrack_event, +}; + +static struct notifier_block ctnl_notifier_exp = { + .notifier_call = ctnetlink_expect_event, +}; +#endif + +static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem ctnl_subsys = { + .name = "conntrack", + .subsys_id = NFNL_SUBSYS_CTNETLINK, + .cb_count = IPCTNL_MSG_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_cb, +}; + +static struct nfnetlink_subsystem ctnl_exp_subsys = { + .name = "conntrack_expect", + .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, + .cb_count = IPCTNL_MSG_EXP_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_exp_cb, +}; + +static int __init ctnetlink_init(void) +{ + int ret; + + printk("ctnetlink v%s: registering with nfnetlink.\n", version); + ret = nfnetlink_subsys_register(&ctnl_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register with nfnetlink.\n"); + goto err_out; + } + + ret = nfnetlink_subsys_register(&ctnl_exp_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); + goto err_unreg_subsys; + } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ret = ip_conntrack_register_notifier(&ctnl_notifier); + if (ret < 0) { + printk("ctnetlink_init: cannot register notifier.\n"); + goto err_unreg_exp_subsys; + } + + ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); + if (ret < 0) { + printk("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + + return 0; + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +err_unreg_notifier: + ip_conntrack_unregister_notifier(&ctnl_notifier); +err_unreg_exp_subsys: + nfnetlink_subsys_unregister(&ctnl_exp_subsys); +#endif +err_unreg_subsys: + nfnetlink_subsys_unregister(&ctnl_subsys); +err_out: + return ret; +} + +static void __exit ctnetlink_exit(void) +{ + printk("ctnetlink: unregistering from nfnetlink.\n"); + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_unregister_notifier(&ctnl_notifier_exp); + ip_conntrack_unregister_notifier(&ctnl_notifier); +#endif + + nfnetlink_subsys_unregister(&ctnl_exp_subsys); + nfnetlink_subsys_unregister(&ctnl_subsys); + return; +} + +module_init(ctnetlink_init); +module_exit(ctnetlink_exit); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index dca1f63d6f51..3f90cb9979ac 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -109,16 +109,17 @@ static int icmp_packet(struct ip_conntrack *ct, return NF_ACCEPT; } +static u_int8_t valid_new[] = { + [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 +}; + /* Called when a new connection for this protocol found. */ static int icmp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) { - static u_int8_t valid_new[] - = { [ICMP_ECHO] = 1, - [ICMP_TIMESTAMP] = 1, - [ICMP_INFO_REQUEST] = 1, - [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ @@ -159,11 +160,12 @@ icmp_error_message(struct sk_buff *skb, return NF_ACCEPT; } - innerproto = ip_ct_find_proto(inside->ip.protocol); + innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; /* Are they talking about one of our connections? */ if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } @@ -171,8 +173,10 @@ icmp_error_message(struct sk_buff *skb, been preserved inside the ICMP. */ if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { DEBUGP("icmp_error_track: Can't invert tuple\n"); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } + ip_conntrack_proto_put(innerproto); *ctinfo = IP_CT_RELATED; @@ -266,6 +270,47 @@ checksum_skipped: return icmp_error_message(skb, ctinfo, hooknum); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int icmp_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + if (t->dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[t->dst.u.icmp.type]) + return -EINVAL; + + return 0; + +nfattr_failure: + return -1; +} + +static int icmp_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMP_TYPE-1] + || !tb[CTA_PROTO_ICMP_CODE-1] + || !tb[CTA_PROTO_ICMP_ID-1]) + return -1; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + tuple->src.u.icmp.id = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + + return 0; +} +#endif + struct ip_conntrack_protocol ip_conntrack_protocol_icmp = { .proto = IPPROTO_ICMP, @@ -277,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_icmp = .packet = icmp_packet, .new = icmp_new, .error = icmp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = icmp_tuple_to_nfattr, + .nfattr_to_tuple = icmp_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 3d5f878a07d1..a875f35e576d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -505,7 +505,12 @@ static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { .packet = sctp_packet, .new = sctp_new, .destroy = NULL, - .me = THIS_MODULE + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index a569ad1ee4d9..c2bce22d4031 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct seq_file *s, return seq_printf(s, "%s ", tcp_conntrack_names[state]); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct) +{ + read_lock_bh(&tcp_lock); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} +#endif + static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; @@ -1100,4 +1117,10 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .packet = tcp_packet, .new = tcp_new, .error = tcp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 6066eaf4d825..14130169cbfd 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -145,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_udp = .packet = udp_packet, .new = udp_new, .error = udp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index f0880004115d..ca97c3ac2f2a 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -5,7 +5,7 @@ */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team + * (C) 2002-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file *s, void *v) if (DIRECTION(hash)) return 0; - proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); IP_NF_ASSERT(proto); if (seq_printf(s, "%-8s %u %ld ", @@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v) seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - ip_ct_find_proto(expect->tuple.dst.protonum)); + __ip_conntrack_proto_find(expect->tuple.dst.protonum)); return seq_putc(s, '\n'); } @@ -992,12 +991,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); EXPORT_SYMBOL(ip_ct_refresh_acct); -EXPORT_SYMBOL(ip_ct_protos); -EXPORT_SYMBOL(ip_ct_find_proto); + EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); + EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); EXPORT_SYMBOL(ip_conntrack_htable_size); @@ -1005,7 +1008,28 @@ EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_put); #ifdef CONFIG_IP_NF_NAT_NEEDED EXPORT_SYMBOL(ip_conntrack_tcp_update); #endif + +EXPORT_SYMBOL_GPL(ip_conntrack_flush); +EXPORT_SYMBOL_GPL(__ip_conntrack_find); + +EXPORT_SYMBOL_GPL(ip_conntrack_alloc); +EXPORT_SYMBOL_GPL(ip_conntrack_free); +EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); + +EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); + +EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); + +EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); +#endif diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index ed4d731880f7..567c802fecf0 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock); static unsigned int ip_nat_htable_size; static struct list_head *bysource; + +#define MAX_IP_NAT_PROTO 256 struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; +static inline struct ip_nat_protocol * +__ip_nat_proto_find(u_int8_t protonum) +{ + return ip_nat_protos[protonum]; +} + +struct ip_nat_protocol * +ip_nat_proto_find_get(u_int8_t protonum) +{ + struct ip_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __ip_nat_proto_find(protonum); + if (p) { + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; + } + preempt_enable(); + + return p; +} + +void +ip_nat_proto_put(struct ip_nat_protocol *p) +{ + module_put(p->me); +} /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -103,7 +134,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); + struct ip_nat_protocol *proto = + __ip_nat_proto_find(tuple->dst.protonum); /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - struct ip_nat_protocol *proto - = ip_nat_find_proto(orig_tuple->dst.protonum); + struct ip_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ + proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ip_nat_proto_put(proto); return; + } /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); + + ip_nat_proto_put(proto); } unsigned int @@ -320,6 +357,7 @@ manip_pkt(u_int16_t proto, enum ip_nat_manip_type maniptype) { struct iphdr *iph; + struct ip_nat_protocol *p; if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; @@ -327,9 +365,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - target, maniptype)) + p = ip_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + ip_nat_proto_put(p); return 0; + } + ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; @@ -425,7 +466,8 @@ int icmp_reply_translation(struct sk_buff **pskb, if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, ip_ct_find_proto(inside->ip.protocol))) + &inner, + __ip_conntrack_proto_find(inside->ip.protocol))) return 0; /* Change inner back to look like incoming packet. We do the @@ -495,6 +537,49 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) synchronize_net(); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} + +int +ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +#endif + int __init ip_nat_init(void) { size_t i; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 6596c9ee1655..38fdfc2093c4 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -107,10 +107,15 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_icmp -= { "ICMP", IPPROTO_ICMP, += { "ICMP", IPPROTO_ICMP, THIS_MODULE, icmp_manip_pkt, icmp_in_range, icmp_unique_tuple, icmp_print, - icmp_print_range + icmp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a98e36d2b3c6..f03cd0f0c2bf 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -170,10 +171,15 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_tcp -= { "TCP", IPPROTO_TCP, += { "TCP", IPPROTO_TCP, THIS_MODULE, tcp_manip_pkt, tcp_in_range, tcp_unique_tuple, tcp_print, - tcp_print_range + tcp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 9f66e5625664..7a4e66ecbc0a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -157,10 +157,15 @@ udp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_udp -= { "UDP", IPPROTO_UDP, += { "UDP", IPPROTO_UDP, THIS_MODULE, udp_manip_pkt, udp_in_range, udp_unique_tuple, udp_print, - udp_print_range + udp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index f5525bd58d16..512d8f2fb824 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -61,7 +61,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_unknown_protocol = { - "unknown", 0, + "unknown", 0, THIS_MODULE, unknown_manip_pkt, unknown_in_range, unknown_unique_tuple, diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 9ecba979033a..89db052add81 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -394,6 +394,8 @@ module_exit(fini); EXPORT_SYMBOL(ip_nat_setup_info); EXPORT_SYMBOL(ip_nat_protocol_register); EXPORT_SYMBOL(ip_nat_protocol_unregister); +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); +EXPORT_SYMBOL_GPL(ip_nat_proto_put); EXPORT_SYMBOL(ip_nat_cheat_check); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_mangle_udp_packet); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 710acd77cc4c..b0ed57981847 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -121,6 +121,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, nfa->nfa_type = attrtype; nfa->nfa_len = size; memcpy(NFA_DATA(nfa), data, attrlen); + memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); } int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) From 83e3609eba3818f6e18b8bf9442195169ac306b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:33:31 -0700 Subject: [PATCH 276/584] [REQSK]: Move the syn_table destroy from tcp_listen_stop to reqsk_queue_destroy Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 5 ++++- net/core/request_sock.c | 26 ++++++++++++++++++++++++++ net/ipv4/tcp.c | 35 ++++++++++------------------------- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 72fd6f5e86b1..334717bf9ef6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -89,6 +89,7 @@ struct listen_sock { int qlen_young; int clock_hand; u32 hash_rnd; + u32 nr_table_entries; struct request_sock *syn_table[0]; }; @@ -129,11 +130,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock return lopt; } -static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) { kfree(reqsk_queue_yank_listen_sk(queue)); } +extern void reqsk_queue_destroy(struct request_sock_queue *queue); + static inline struct request_sock * reqsk_queue_yank_acceptq(struct request_sock_queue *queue) { diff --git a/net/core/request_sock.c b/net/core/request_sock.c index bb55675f0685..4e99ce5c08f2 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -53,6 +53,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = queue->rskq_accept_head = NULL; + lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, } EXPORT_SYMBOL(reqsk_queue_alloc); + +void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + /* make all the listen_opt local to us */ + struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + + if (lopt->qlen != 0) { + int i; + + for (i = 0; i < lopt->nr_table_entries; i++) { + struct request_sock *req; + + while ((req = lopt->syn_table[i]) != NULL) { + lopt->syn_table[i] = req->dl_next; + lopt->qlen--; + reqsk_free(req); + } + } + } + + BUG_TRAP(lopt->qlen == 0); + kfree(lopt); +} + +EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d2696af46c70..42a2e2ccd430 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -487,7 +487,7 @@ int tcp_listen_start(struct sock *sk) } sk->sk_state = TCP_CLOSE; - reqsk_queue_destroy(&tp->accept_queue); + __reqsk_queue_destroy(&tp->accept_queue); return -EADDRINUSE; } @@ -499,38 +499,23 @@ int tcp_listen_start(struct sock *sk) static void tcp_listen_stop (struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt; struct request_sock *acc_req; struct request_sock *req; - int i; tcp_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); - if (lopt->qlen) { - for (i = 0; i < TCP_SYNQ_HSIZE; i++) { - while ((req = lopt->syn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - reqsk_free(req); - - /* Following specs, it would be better either to send FIN - * (and enter FIN-WAIT-1, it is normal close) - * or to send active reset (abort). - * Certainly, it is pretty dangerous while synflood, but it is - * bad justification for our negligence 8) - * To be honest, we are not able to make either - * of the variants now. --ANK - */ - } - } - } - BUG_TRAP(!lopt->qlen); - - kfree(lopt); + /* Following specs, it would be better either to send FIN + * (and enter FIN-WAIT-1, it is normal close) + * or to send active reset (abort). + * Certainly, it is pretty dangerous while synflood, but it is + * bad justification for our negligence 8) + * To be honest, we are not able to make either + * of the variants now. --ANK + */ + reqsk_queue_destroy(&tp->accept_queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; From b6b99eb5409d75ae35390057cd28f3aedfbd4cf4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:33:51 -0700 Subject: [PATCH 277/584] [NET]: Reduce tc_index/tc_verd to u16 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4aeadb102589..af4f02e98243 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -265,9 +265,9 @@ struct sk_buff { #endif #endif /* CONFIG_NETFILTER */ #ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ + __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT - __u32 tc_verd; /* traffic control verdict */ + __u16 tc_verd; /* traffic control verdict */ #endif #endif From f2ccd8fa06c8e302116e71df372f5c1f83432e03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:34:12 -0700 Subject: [PATCH 278/584] [NET]: Kill skb->real_dev Bonding just wants the device before the skb_bond() decapsulation occurs, so simply pass that original device into packet_type->func() as an argument. It remains to be seen whether we can use this same exact thing to get rid of skb->input_dev as well. Signed-off-by: David S. Miller --- drivers/block/aoe/aoenet.c | 2 +- drivers/net/bonding/bond_3ad.c | 11 ++++------- drivers/net/bonding/bond_3ad.h | 2 +- drivers/net/bonding/bond_alb.c | 5 ++--- drivers/net/hamradio/bpqether.c | 4 ++-- drivers/net/pppoe.c | 6 ++++-- drivers/net/wan/hdlc_generic.c | 2 +- drivers/net/wan/lapbether.c | 2 +- drivers/net/wan/syncppp.c | 2 +- include/linux/if_vlan.h | 1 - include/linux/netdevice.h | 10 ++++++---- include/linux/skbuff.h | 2 -- include/net/arp.h | 2 +- include/net/ax25.h | 2 +- include/net/datalink.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 3 ++- include/net/llc.h | 8 +++++--- include/net/p8022.h | 3 ++- include/net/psnap.h | 2 +- include/net/x25.h | 2 +- net/802/p8022.c | 3 ++- net/802/psnap.c | 7 ++++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 6 +++--- net/ax25/ax25_in.c | 8 ++++---- net/core/dev.c | 35 ++++++++++++++++++--------------- net/core/skbuff.c | 2 -- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ipconfig.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/irlap_frame.c | 2 +- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 3 ++- net/llc/llc_input.c | 4 ++-- net/netrom/nr_dev.c | 2 +- net/packet/af_packet.c | 6 +++--- net/x25/x25_dev.c | 2 +- 45 files changed, 96 insertions(+), 91 deletions(-) diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 9e6f51c528b0..4be976940f69 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl) * (1) len doesn't include the header by default. I want this. */ static int -aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) +aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) { struct aoe_hdr *h; u32 n; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a2e8dda5afac..d2f34d5a8083 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2419,22 +2419,19 @@ out: return 0; } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) { struct bonding *bond = dev->priv; struct slave *slave = NULL; int ret = NET_RX_DROP; - if (!(dev->flags & IFF_MASTER)) { + if (!(dev->flags & IFF_MASTER)) goto out; - } read_lock(&bond->lock); - slave = bond_get_slave_by_dev((struct bonding *)dev->priv, - skb->real_dev); - if (slave == NULL) { + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev); + if (!slave) goto out_unlock; - } bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index f46823894187..673a30af5660 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave); void bond_3ad_handle_link_change(struct slave *slave, char link); int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); #endif //__BOND_3AD_H__ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 19e829b567d0..f8fce3961197 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) _unlock_rx_hashtbl(bond); } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) +static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (!(bond_dev->flags & IFF_MASTER)) { + if (!(bond_dev->flags & IFF_MASTER)) goto out; - } if (!arp) { dprintk("Packet has no ARP data\n"); diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index ba9f0580e1f9..2946e037a9b1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; static char bpq_eth_addr[6]; -static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static const char *bpq_print_ethaddr(const unsigned char *); @@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct net_device *dev) /* * Receive an AX.25 frame via an ethernet interface. */ -static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ce1a9bf7b9a7..82f236cc3b9b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -377,7 +377,8 @@ abort_kfree: ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; @@ -426,7 +427,8 @@ out: ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c index a63f6a2cc4f7..cdd4c09c2d90 100644 --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c @@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev) static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *p) + struct packet_type *p, struct net_device *orig_dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto.netif_rx) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 7f2e3653c5e5..6c302e9dbca2 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) /* * Receive a LAPB frame via an ethernet interface. */ -static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len, err; struct lapbethdev *lapbeth; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 84b65c60c799..f58c794a963a 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, u16 len) * after interrupt servicing to process frames queued via netif_rx. */ -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p) +static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 62a9d89dfbe2..17d0c0d40b0e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; - skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a0ed7f9e801..296cf93a65e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,10 +497,12 @@ static inline void *netdev_priv(struct net_device *dev) #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - int (*func) (struct sk_buff *, struct net_device *, - struct packet_type *); + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, + struct net_device *); void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f02e98243..60b32151f76a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -164,7 +164,6 @@ struct skb_shared_info { * @stamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on - * @real_dev: The real device we are using * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -206,7 +205,6 @@ struct sk_buff { struct timeval stamp; struct net_device *dev; struct net_device *input_dev; - struct net_device *real_dev; union { struct tcphdr *th; diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a52..a13e30c35f42 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 3696f988a9f1..926eed543023 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb5..deb7ca75db48 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143f..2570b536c8f4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b3..533fc074ed90 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e2..71769a5aeef3 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c3581..223f8fa9ffca 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,8 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); #endif diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b36..b2e01cc3fc8a 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e8..fee62ff8c194 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); diff --git a/net/802/p8022.c b/net/802/p8022.c index 5ae63416df6d..b24817c63ca8 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, struct datalink_proto *register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct datalink_proto *proto; diff --git a/net/802/psnap.c b/net/802/psnap.c index 1053821ddf93..ab80b1fab53c 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc) * A SNAP packet has arrived */ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; struct datalink_proto *proto; @@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, /* Pass the frame on. */ skb->h.raw += 5; skb_pull(skb, 5); - rc = proto->rcvfunc(skb, dev, &snap_packet_type); + rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } else { skb->sk = NULL; kfree_skb(skb); @@ -118,7 +118,8 @@ module_exit(snap_exit); struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *)) + struct packet_type *, + struct net_device *)) { struct datalink_proto *proto = NULL; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 508b1fa14546..9ae3a14dd016 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev, /* found in vlan_dev.c */ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype); + struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 49c487413518..145f5cde96cf 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype) + struct packet_type* ptype, struct net_device *orig_dev) { unsigned char *rawp = NULL; struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c34614ea5fce..7076097debc2 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, * frame. We currently only support Ethernet. */ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct elapaarp *ea = aarp_hdr(skb); int hash, ret = 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 192b529f86a4..ffde33cd09ba 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1390,7 +1390,7 @@ free_it: * [ie ARPHRD_ETHERTALK] */ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct ddpehdr *ddp; struct sock *sock; @@ -1482,7 +1482,7 @@ freeit: * header and append a long one. */ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { /* Expand any short form frames */ if (skb->mac.raw[2] == 1) { @@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, } skb->h.raw = skb->data; - return atalk_rcv(skb, dev, pt); + return atalk_rcv(skb, dev, pt, orig_dev); freeit: kfree_skb(skb); return 0; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 3dc808fde33f..124eec8216d7 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -132,7 +132,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ + ip_rcv(skb, skb->dev, NULL, skb->dev); /* Wrong ptype */ return 1; } #endif @@ -258,7 +258,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ + ip_rcv(skb, dev, ptype, dev); /* Note ptype here is the wrong one, fix me later */ break; case AX25_P_ARP: @@ -268,7 +268,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_ARP); - arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ + arp_rcv(skb, dev, ptype, dev); /* Note ptype here is wrong... */ break; #endif case AX25_P_TEXT: @@ -454,7 +454,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, * Receive an AX.25 frame via a SLIP interface. */ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ diff --git a/net/core/dev.c b/net/core/dev.c index faf59b02c4bf..e1cc162bf295 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1058,7 +1058,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); @@ -1425,14 +1425,14 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static __inline__ void skb_bond(struct sk_buff *skb) +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) { - skb->real_dev = skb->dev; + if (dev->master) skb->dev = dev->master; - } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1482,10 +1482,11 @@ static void net_tx_action(struct softirq_action *h) } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1496,7 +1497,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret) + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; @@ -1505,14 +1507,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb, return 0; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev); + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); *pt_prev = NULL; } return br_handle_frame_hook(port, pskb); } #else -#define handle_bridge(skb, pt_prev, ret) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1559,6 +1561,7 @@ static int ing_filter(struct sk_buff *skb) int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; @@ -1569,7 +1572,7 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); - skb_bond(skb); + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; @@ -1590,14 +1593,14 @@ int netif_receive_skb(struct sk_buff *skb) list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -1616,7 +1619,7 @@ ncls: handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1624,13 +1627,13 @@ ncls: if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16df7bd77e78..ef498cb9f786 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -333,7 +333,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->sk = NULL; C(stamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -397,7 +396,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0c30409fe9e5..bd49dd97a09c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2064,7 +2064,7 @@ static struct notifier_block dn_dev_notifier = { .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { .type = __constant_htons(ETH_P_DNA_RT), diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2399fa8a3f86..2c915f305be3 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb) return NET_RX_SUCCESS; } -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dn_skb_cb *cb; unsigned char flags = 0; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index b807a314269e..8f0639905558 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1009,7 +1009,7 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a642fd612853..6eb9c549d643 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip, static void parp_redo(struct sk_buff *skb) { nf_reset(skb); - arp_rcv(skb, skb->dev, NULL); + arp_rcv(skb, skb->dev, NULL, skb->dev); } /* @@ -927,7 +927,7 @@ out: * Receive an arp request from the device layer. */ -int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *arp; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c703528e0bcd..d603247bdfe9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,7 +358,7 @@ drop: /* * Main IP Receive routine. */ -int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d2bf8e1930a3..63e106605f28 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -393,7 +393,7 @@ static int __init ic_defaults(void) #ifdef IPCONFIG_RARP -static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { .type = __constant_htons(ETH_P_RARP), @@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void) * Process received RARP packet. */ static int __init -ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *rarp; unsigned char *rarp_ptr; @@ -555,7 +555,7 @@ struct bootp_pkt { /* BOOTP packet format */ #define DHCPRELEASE 7 #define DHCPINFORM 8 -static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { .type = __constant_htons(ETH_P_IP), @@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *ext) /* * Receive BOOTP reply. */ -static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct bootp_pkt *b; struct iphdr *h; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 10fbb50daea4..ab51c0369e15 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb) return dst_input(skb); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ipv6hdr *hdr; u32 pkt_len; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5a27e5df5886..3a13c5d1d4d2 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1627,7 +1627,7 @@ out: return rc; } -static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index eb65b4925b51..3e9a06abbdd0 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1303,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, * Jean II */ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct irlap_info info; struct irlap_cb *self; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 6ffaed4544e9..634901dd156f 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -54,7 +54,7 @@ extern int irsock_init(void); extern void irsock_cleanup(void); /* irlap_frame.c */ extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); /* * Module parameters diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 5ff02c080a0b..9727455bf0e7 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -103,7 +103,8 @@ out: struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct llc_sap *sap = llc_sap_find(lsap); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 4da6976efc9c..13b46240b7a1 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) * data now), it queues this frame in the connection's backlog. */ int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct llc_sap *sap; struct llc_pdu_sn *pdu; @@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * LLC functionality */ if (sap->rcv_func) { - sap->rcv_func(skb, dev, pt); + sap->rcv_func(skb, dev, pt, orig_dev); goto out; } dest = llc_pdu_type(skb); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 220bf7494f71..83eb41d9b937 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -64,7 +64,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; - ip_rcv(skb, skb->dev, NULL); + ip_rcv(skb, skb->dev, NULL, skb->dev); return 1; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9d5980aa4de..deb5f6f7f858 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -241,7 +241,7 @@ static struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET static struct proto_ops packet_ops_spkt; -static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; @@ -441,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned we will not harm anyone. */ -static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_ll *sll; @@ -546,7 +546,7 @@ drop: } #ifdef CONFIG_PACKET_MMAP -static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 36fc3bf6d882..adfe7b8df355 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) } int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct sk_buff *nskb; struct x25_neigh *nb; From 0742fd53a3774781255bd1e471e7aa2e4a82d5f7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 9 Aug 2005 19:35:47 -0700 Subject: [PATCH 279/584] [IPV4]: possible cleanups This patch contains the following possible cleanups: - make needlessly global code static - #if 0 the following unused global function: - xfrm4_state.c: xfrm4_state_fini - remove the following unneeded EXPORT_SYMBOL's: - ip_output.c: ip_finish_output - ip_output.c: sysctl_ip_default_ttl - fib_frontend.c: ip_dev_find - inetpeer.c: inet_peer_idlock - ip_options.c: ip_options_compile - ip_options.c: ip_options_undo - net/core/request_sock.c: sysctl_max_syn_backlog Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/ip.h | 2 -- include/net/route.h | 4 ---- include/net/xfrm.h | 1 - net/core/request_sock.c | 1 - net/ipv4/fib_frontend.c | 1 - net/ipv4/inetpeer.c | 2 -- net/ipv4/ip_options.c | 3 --- net/ipv4/ip_output.c | 7 +------ net/ipv4/multipath_drr.c | 2 +- net/ipv4/route.c | 4 +++- net/ipv4/xfrm4_state.c | 2 ++ 11 files changed, 7 insertions(+), 22 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 2570b536c8f4..c16fb6ac3446 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -140,8 +140,6 @@ struct ip_reply_arg { void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -extern int ip_finish_output(struct sk_buff *skb); - struct ipv4_config { int log_martians; diff --git a/include/net/route.h b/include/net/route.h index c3cd069a9aca..63c94558236d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -105,10 +105,6 @@ struct rt_cache_stat unsigned int out_hlist_search; }; -extern struct rt_cache_stat *rt_cache_stat; -#define RT_CACHE_STAT_INC(field) \ - (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) - extern struct ip_rt_acct *ip_rt_acct; struct in_device; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 868ef88ef971..a9d0d8c5dfbf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -818,7 +818,6 @@ extern void xfrm6_init(void); extern void xfrm6_fini(void); extern void xfrm_state_init(void); extern void xfrm4_state_init(void); -extern void xfrm4_state_fini(void); extern void xfrm6_state_init(void); extern void xfrm6_state_fini(void); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 4e99ce5c08f2..98f0fc923f91 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -32,7 +32,6 @@ * Further increasing requires to change hash table size. */ int sysctl_max_syn_backlog = 256; -EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, const int nr_table_entries) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cd8e45ab9580..e5722084239b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -662,5 +662,4 @@ void __init ip_fib_init(void) } EXPORT_SYMBOL(inet_addr_type); -EXPORT_SYMBOL(ip_dev_find); EXPORT_SYMBOL(ip_rt_ioctl); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ab18a853d7ce..3c513ceaca76 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -459,5 +459,3 @@ static void peer_check_expire(unsigned long dummy) peer_total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } - -EXPORT_SYMBOL(inet_peer_idlock); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 6d89f3f3e701..7e02ba584079 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -620,6 +620,3 @@ int ip_options_rcv_srr(struct sk_buff *skb) } return 0; } - -EXPORT_SYMBOL(ip_options_compile); -EXPORT_SYMBOL(ip_options_undo); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 766564cb4207..c934f5316c3b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -205,7 +205,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; @@ -1328,12 +1328,7 @@ void __init ip_init(void) #endif } -EXPORT_SYMBOL(ip_finish_output); EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_generic_getfrag); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(ip_send_check); - -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(sysctl_ip_default_ttl); -#endif diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index c9cf8726051d..db67373f9b34 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -107,7 +107,7 @@ static int drr_dev_event(struct notifier_block *this, return NOTIFY_DONE; } -struct notifier_block drr_dev_notifier = { +static struct notifier_block drr_dev_notifier = { .notifier_call = drr_dev_event, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d675ff80b04d..3aef0e15460f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -240,7 +240,9 @@ static unsigned rt_hash_mask; static int rt_hash_log; static unsigned int rt_hash_rnd; -struct rt_cache_stat *rt_cache_stat; +static struct rt_cache_stat *rt_cache_stat; +#define RT_CACHE_STAT_INC(field) \ + (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 050611d7a967..d23e07fc81fa 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -128,8 +128,10 @@ void __init xfrm4_state_init(void) xfrm_state_register_afinfo(&xfrm4_state_afinfo); } +#if 0 void __exit xfrm4_state_fini(void) { xfrm_state_unregister_afinfo(&xfrm4_state_afinfo); } +#endif /* 0 */ From 86e65da9c1fc6fb421b9f796b597b3eced6b55ab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:36:29 -0700 Subject: [PATCH 280/584] [NET]: Remove explicit initializations of skb->input_dev Instead, set it in one place, namely the beginning of netif_receive_skb(). Based upon suggestions from Jamal Hadi Salim. Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_net.c | 1 - drivers/isdn/i4l/isdn_ppp.c | 1 - drivers/net/ppp_generic.c | 1 - include/net/pkt_cls.h | 6 +++--- include/net/x25device.h | 1 - net/core/dev.c | 10 +++++----- net/ethernet/eth.c | 1 - 7 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index f30e8e63ae0d..96c115e13389 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1786,7 +1786,6 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb) lp->stats.rx_bytes += skb->len; } skb->dev = ndev; - skb->input_dev = ndev; skb->pkt_type = PACKET_HOST; skb->mac.raw = skb->data; #ifdef ISDN_DEBUG_NET_DUMP diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 260a323a96d3..d97a9be5469c 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -1177,7 +1177,6 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff mlp->huptimer = 0; #endif /* CONFIG_IPPP_FILTER */ skb->dev = dev; - skb->input_dev = dev; skb->mac.raw = skb->data; netif_rx(skb); /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index a32668e88e09..bb71638a7c44 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1657,7 +1657,6 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) skb->dev = ppp->dev; skb->protocol = htons(npindex_to_ethertype[npi]); skb->mac.raw = skb->data; - skb->input_dev = ppp->dev; netif_rx(skb); ppp->dev->last_rx = jiffies; } diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4abda6aec05a..b902d24a3256 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { - if (0 != indev[0]) { - if (NULL == skb->input_dev) + if (indev[0]) { + if (!skb->input_dev) return 0; - else if (0 != strcmp(indev, skb->input_dev->name)) + if (strcmp(indev, skb->input_dev->name)) return 0; } diff --git a/include/net/x25device.h b/include/net/x25device.h index d45ae883bd1d..1a318374faef 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -8,7 +8,6 @@ static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->mac.raw = skb->data; - skb->input_dev = skb->dev = dev; skb->pkt_type = PACKET_HOST; return htons(ETH_P_X25); diff --git a/net/core/dev.c b/net/core/dev.c index e1cc162bf295..9d153eb1e8cf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1536,17 +1536,14 @@ static int ing_filter(struct sk_buff *skb) __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { printk("Redir loop detected Dropping packet (%s->%s)\n", - skb->input_dev?skb->input_dev->name:"??",skb->dev->name); + skb->input_dev->name, skb->dev->name); return TC_ACT_SHOT; } skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); - if (NULL == skb->input_dev) { - skb->input_dev = skb->dev; - printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); - } + spin_lock(&dev->ingress_lock); if ((q = dev->qdisc_ingress) != NULL) result = q->enqueue(skb, q); @@ -1572,6 +1569,9 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); + if (!skb->input_dev) + skb->input_dev = skb->dev; + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f6dbfb99b14d..f444a2f2675f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -163,7 +163,6 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->mac.raw=skb->data; skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - skb->input_dev = dev; if(*eth->h_dest&1) { From 373ac73595491b7c1f2f10cb37e9b7bae6901227 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:36:53 -0700 Subject: [PATCH 281/584] [NETFILTER]: C99 initizalizers for NAT protocols Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_nat_proto_icmp.c | 20 +++++++++++--------- net/ipv4/netfilter/ip_nat_proto_tcp.c | 20 +++++++++++--------- net/ipv4/netfilter/ip_nat_proto_udp.c | 20 +++++++++++--------- net/ipv4/netfilter/ip_nat_proto_unknown.c | 13 +++++++------ 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 38fdfc2093c4..7ed2fdb53457 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -106,16 +106,18 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range) else return 0; } -struct ip_nat_protocol ip_nat_protocol_icmp -= { "ICMP", IPPROTO_ICMP, THIS_MODULE, - icmp_manip_pkt, - icmp_in_range, - icmp_unique_tuple, - icmp_print, - icmp_print_range, +struct ip_nat_protocol ip_nat_protocol_icmp = { + .name = "ICMP", + .protonum = IPPROTO_ICMP, + .me = THIS_MODULE, + .manip_pkt = icmp_manip_pkt, + .in_range = icmp_in_range, + .unique_tuple = icmp_unique_tuple, + .print = icmp_print, + .print_range = icmp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - ip_nat_port_range_to_nfattr, - ip_nat_port_nfattr_to_range, + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index f03cd0f0c2bf..6113a16af867 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -170,16 +170,18 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range) else return 0; } -struct ip_nat_protocol ip_nat_protocol_tcp -= { "TCP", IPPROTO_TCP, THIS_MODULE, - tcp_manip_pkt, - tcp_in_range, - tcp_unique_tuple, - tcp_print, - tcp_print_range, +struct ip_nat_protocol ip_nat_protocol_tcp = { + .name = "TCP", + .protonum = IPPROTO_TCP, + .me = THIS_MODULE, + .manip_pkt = tcp_manip_pkt, + .in_range = tcp_in_range, + .unique_tuple = tcp_unique_tuple, + .print = tcp_print, + .print_range = tcp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - ip_nat_port_range_to_nfattr, - ip_nat_port_nfattr_to_range, + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 7a4e66ecbc0a..689478e637a7 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -156,16 +156,18 @@ udp_print_range(char *buffer, const struct ip_nat_range *range) else return 0; } -struct ip_nat_protocol ip_nat_protocol_udp -= { "UDP", IPPROTO_UDP, THIS_MODULE, - udp_manip_pkt, - udp_in_range, - udp_unique_tuple, - udp_print, - udp_print_range, +struct ip_nat_protocol ip_nat_protocol_udp = { + .name = "UDP", + .protonum = IPPROTO_UDP, + .me = THIS_MODULE, + .manip_pkt = udp_manip_pkt, + .in_range = udp_in_range, + .unique_tuple = udp_unique_tuple, + .print = udp_print, + .print_range = udp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - ip_nat_port_range_to_nfattr, - ip_nat_port_nfattr_to_range, + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index 512d8f2fb824..99bbef56f84e 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -61,10 +61,11 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_unknown_protocol = { - "unknown", 0, THIS_MODULE, - unknown_manip_pkt, - unknown_in_range, - unknown_unique_tuple, - unknown_print, - unknown_print_range + .name = "unknown", + .me = THIS_MODULE, + .manip_pkt = unknown_manip_pkt, + .in_range = unknown_in_range, + .unique_tuple = unknown_unique_tuple, + .print = unknown_print, + .print_range = unknown_print_range }; From 089af26c706d1473f641c909fee7c878d29c1f1a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:37:23 -0700 Subject: [PATCH 282/584] [NETFILTER]: Rename skb_ip_make_writable() to skb_make_writable() There is nothing IPv4-specific in it. In fact, it was already used by IPv6, too... Upcoming nfnetlink_queue code will use it for any kind of packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 +++++ include/linux/netfilter_ipv4.h | 5 ----- net/core/netfilter.c | 6 +++--- net/ipv4/netfilter/ip_nat_core.c | 4 ++-- net/ipv4/netfilter/ip_nat_helper.c | 8 ++++---- net/ipv4/netfilter/ip_nat_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 2 +- net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_DSCP.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 4 ++-- net/ipv4/netfilter/ipt_TCPMSS.c | 2 +- net/ipv4/netfilter/ipt_TOS.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- 15 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ec60856408fd..54b97a1baba5 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -193,6 +193,11 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +/* Call this before modifying an existing packet: ensures it is + modifiable and linear to the point you care about (writable_len). + Returns true or false. */ +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 552815b8193e..fdc4a9527343 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -80,11 +80,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); -/* Call this before modifying an existing IP packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -extern int skb_ip_make_writable(struct sk_buff **pskb, - unsigned int writable_len); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 076c156d5eda..bbf9081a6804 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -512,8 +512,9 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); +#endif /*CONFIG_INET*/ -int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; @@ -540,8 +541,7 @@ copy_skb: *pskb = nskb; return 1; } -EXPORT_SYMBOL(skb_ip_make_writable); -#endif /*CONFIG_INET*/ +EXPORT_SYMBOL(skb_make_writable); /* Internal logging interface, which relies on the real LOG target modules */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 567c802fecf0..1adedb743f60 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -359,7 +359,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct ip_nat_protocol *p; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; @@ -431,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb, struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 158f34f32c04..d2dd5d313556 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, struct tcphdr *tcph; int datalen; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, match_offset + match_len) return 0; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb, optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - if (!skb_ip_make_writable(pskb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); @@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 7ed2fdb53457..938719043999 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb, struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 6113a16af867..1d381bf68574 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -103,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb, if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(pskb, hdroff + hdrsize)) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 689478e637a7..c4906e1aa24a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb, u32 oldip, newip; u16 *portptr, newport; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 2a48b6e635ae..93b2c5111bb2 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb, return NF_DROP; } - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; spin_lock_bh(&snmp_lock); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index bc0af8d8e910..ae975ac59c6a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -388,7 +388,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 975476fef27a..6e319570a28c 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f63a9bc0e4d2..a1319693f648 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -66,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) tcph->cwr == einfo->proto.tcp.cwr))) return 1; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 949288319ca8..8db70d6908c3 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -58,7 +58,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, unsigned int i; u_int8_t *opt; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; if ((*pskb)->ip_summed == CHECKSUM_HW && diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 49abb7eef0a4..deadb36d4428 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 83ccedceed17..7130603a32c5 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -384,7 +384,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; From 020b4c12dbe3868d792a01d7c1470cd837abe10f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:39:00 -0700 Subject: [PATCH 283/584] [NETFILTER]: Move ipv4 specific code from net/core/netfilter.c to net/ipv4/netfilter.c Netfilter cleanup - Move ipv4 code from net/core/netfilter.c to net/ipv4/netfilter.c - Move ipv6 netfilter code from net/ipv6/ip6_output.c to net/ipv6/netfilter.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/core/netfilter.c | 72 --------------------------------------- net/ipv4/Makefile | 2 +- net/ipv4/netfilter.c | 79 +++++++++++++++++++++++++++++++++++++++++++ net/ipv6/Makefile | 2 +- net/ipv6/ip6_output.c | 32 ------------------ net/ipv6/ipv6_syms.c | 3 -- net/ipv6/netfilter.c | 43 +++++++++++++++++++++++ 7 files changed, 124 insertions(+), 109 deletions(-) create mode 100644 net/ipv4/netfilter.c create mode 100644 net/ipv6/netfilter.c diff --git a/net/core/netfilter.c b/net/core/netfilter.c index bbf9081a6804..9849357f6129 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,12 +22,7 @@ #include #include #include -#include -#include -#include #include -#include -#include /* In this code, we can be waiting indefinitely for userspace to * service a packet if a hook returns NF_QUEUE. We could keep a count @@ -447,73 +442,6 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, return; } -#ifdef CONFIG_INET -/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct rtable *rt; - struct flowi fl = {}; - struct dst_entry *odst; - unsigned int hh_len; - - /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause - * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. - */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; -#endif - fl.proto = iph->protocol; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return -1; - } - dst_release(&rt->u.dst); - dst_release(odst); - } - - if ((*pskb)->dst->error) - return -1; - - /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - return 0; -} -EXPORT_SYMBOL(ip_route_me_harder); -#endif /*CONFIG_INET*/ - int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 55dc6cca1e7b..61c7386bcd2e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c new file mode 100644 index 000000000000..6594d1c9697e --- /dev/null +++ b/net/ipv4/netfilter.c @@ -0,0 +1,79 @@ +#include + +#ifdef CONFIG_NETFILTER + +/* IPv4 specific functions of netfilter core */ +#include +#include + +#include +#include +#include +#include +#include + +/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ +int ip_route_me_harder(struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt; + struct flowi fl = {}; + struct dst_entry *odst; + unsigned int hh_len; + + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause + * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. + */ + if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + fl.nl_u.ip4_u.daddr = iph->daddr; + fl.nl_u.ip4_u.saddr = iph->saddr; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; +#ifdef CONFIG_IP_ROUTE_FWMARK + fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; +#endif + fl.proto = iph->protocol; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + /* Drop old route. */ + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + /* non-local src, find valid iif to satisfy + * rp-filter when calling ip_route_input. */ + fl.nl_u.ip4_u.daddr = iph->saddr; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + odst = (*pskb)->dst; + if (ip_route_input(*pskb, iph->daddr, iph->saddr, + RT_TOS(iph->tos), rt->u.dst.dev) != 0) { + dst_release(&rt->u.dst); + return -1; + } + dst_release(&rt->u.dst); + dst_release(odst); + } + + if ((*pskb)->dst->error) + return -1; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + + return 0; +} +EXPORT_SYMBOL(ip_route_me_harder); +#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b39e04940590..5bccea2d81b4 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,7 +8,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o + ip6_flowlabel.o ipv6_syms.o netfilter.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 590d2b797197..a7fcbcc83576 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -153,38 +153,6 @@ int ip6_output(struct sk_buff *skb) return ip6_output2(skb); } -#ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, - .proto = iph->nexthdr, - }; - - dst = ip6_route_output(skb->sk, &fl); - - if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); - dst_release(dst); - return -EINVAL; - } - - /* Drop old route. */ - dst_release(skb->dst); - - skb->dst = dst; - return 0; -} -#endif - /* * xmit an sk_buff (used by TCP) */ diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 5ade5a5d1990..37a4a99c9fe9 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -#ifdef CONFIG_NETFILTER -EXPORT_SYMBOL(ip6_route_me_harder); -#endif EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c new file mode 100644 index 000000000000..5656d0959aba --- /dev/null +++ b/net/ipv6/netfilter.c @@ -0,0 +1,43 @@ +#include +#include + +#ifdef CONFIG_NETFILTER + +#include +#include +#include +#include +#include + +int ip6_route_me_harder(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct dst_entry *dst; + struct flowi fl = { + .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .nl_u = + { .ip6_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, } }, + .proto = iph->nexthdr, + }; + + dst = ip6_route_output(skb->sk, &fl); + + if (dst->error) { + IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); + dst_release(dst); + return -EINVAL; + } + + /* Drop old route. */ + dst_release(skb->dst); + + skb->dst = dst; + return 0; +} +EXPORT_SYMBOL(ip6_route_me_harder); + +#endif /* CONFIG_NETFILTER */ From 4fdb3bb723db469717c6d38fda667d8b0fa86ebd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:40:55 -0700 Subject: [PATCH 284/584] [NETLINK]: Add properly module refcounting for kernel netlink sockets. - Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 4 +- include/linux/net.h | 3 + include/linux/netlink.h | 2 +- kernel/audit.c | 3 +- lib/kobject_uevent.c | 3 +- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/netfilter/dn_rtmsg.c | 4 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/netfilter/ip_queue.c | 3 +- net/ipv4/netfilter/ipt_ULOG.c | 3 +- net/ipv4/tcp_diag.c | 3 +- net/ipv6/netfilter/ip6_queue.c | 2 +- net/netfilter/nfnetlink.c | 5 +- net/netlink/af_netlink.c | 108 +++++++++++++++++++++++++------- net/xfrm/xfrm_user.c | 4 +- security/selinux/netlink.c | 2 +- 17 files changed, 119 insertions(+), 36 deletions(-) diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b5a5e04b6d37..8809788dac26 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 23; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL); + dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); @@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_bus_master *bm) EXPORT_SYMBOL(w1_add_master_device); EXPORT_SYMBOL(w1_remove_master_device); + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); diff --git a/include/linux/net.h b/include/linux/net.h index 20cb226b2268..39906619b9d7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6552b71bfa73..1c50fea8995b 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -117,7 +117,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc29..ed4019563d56 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 8e49d21057e4..88f4d746aa05 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 01af4fcef26d..561d75c8ed5a 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,7 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b1bb30e6381..9b3c61f1a37d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,7 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 284a9998e53d..3068fddb2da3 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, + THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; @@ -162,6 +163,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); MODULE_AUTHOR("Steven Whitehouse "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); module_init(init); module_exit(fini); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e5722084239b..b5e2f1550c91 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index ae975ac59c6a..b237f7fcad92 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -692,7 +692,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 52a0076302a7..4ea8371ab270 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -62,6 +62,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ @@ -372,7 +373,7 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f66945cb158f..f79bd11a4701 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk, int len) static int __init tcpdiag_init(void) { - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, + THIS_MODULE); if (tcpnl == NULL) return -ENOMEM; return 0; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7130603a32c5..1c3d247a22cc 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -685,7 +685,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b0ed57981847..6210ca42166c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -38,6 +38,8 @@ #include MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; @@ -324,7 +326,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, + THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ff774a06c89d..5d487cd69c8c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,7 +13,12 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo * use nlk_sk, as sk->protinfo is on a diet 8) - * + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days */ #include @@ -92,6 +97,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + struct proto_ops *p_ops; }; static struct netlink_table *nl_table; @@ -341,7 +347,21 @@ static int netlink_create(struct socket *sock, int protocol) if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - sock->ops = &netlink_ops; + netlink_table_grab(); + if (!nl_table[protocol].hash.entries) { +#ifdef CONFIG_KMOD + /* We do 'best effort'. If we find a matching module, + * it is loaded. If not, we don't return an error to + * allow pure userspace<->userspace communication. -HW + */ + netlink_table_ungrab(); + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_table_grab(); +#endif + } + netlink_table_ungrab(); + + sock->ops = nl_table[protocol].p_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) @@ -394,6 +414,22 @@ static int netlink_release(struct socket *sock) }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + + /* When this is a kernel socket, we need to remove the owner pointer, + * since we don't know whether the module will be dying at any given + * point - HW + */ + if (!nlk->pid) { + struct proto_ops *p_tmp; + + netlink_table_grab(); + p_tmp = nl_table[sk->sk_protocol].p_ops; + if (p_tmp != &netlink_ops) { + nl_table[sk->sk_protocol].p_ops = &netlink_ops; + kfree(p_tmp); + } + netlink_table_ungrab(); + } sock_put(sk); return 0; @@ -1023,8 +1059,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) { + struct proto_ops *p_ops; struct socket *sock; struct sock *sk; @@ -1034,22 +1071,63 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) if (unit<0 || unit>=MAX_LINKS) return NULL; + /* Do a quick check, to make us not go down to netlink_insert() + * if protocol already has kernel socket. + */ + sk = netlink_lookup(unit, 0); + if (unlikely(sk)) { + sock_put(sk); + return NULL; + } + if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; + sk = NULL; + if (module) { + /* Every registering protocol implemented in a module needs + * it's own p_ops, since the socket code cannot deal with + * module refcounting otherwise. -HW + */ + p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); + if (!p_ops) + goto out_sock_release; + + memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); + p_ops->owner = module; + } else + p_ops = &netlink_ops; + + netlink_table_grab(); + nl_table[unit].p_ops = p_ops; + netlink_table_ungrab(); + if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + + return sk; + +out_kfree_p_ops: + netlink_table_grab(); + if (nl_table[unit].p_ops != &netlink_ops) { + kfree(nl_table[unit].p_ops); + nl_table[unit].p_ops = &netlink_ops; + } + netlink_table_ungrab(); +out_sock_release: + sock_release(sock); return sk; } @@ -1413,6 +1491,8 @@ enomem: for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; + nl_table[i].p_ops = &netlink_ops; + hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) @@ -1438,21 +1518,7 @@ out: return err; } -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; - proto_unregister(&netlink_proto); -} - core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8da3e25b2c4c..33ceeea783b1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1519,7 +1519,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, + THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; @@ -1537,3 +1538,4 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 18d08acafa78..341dbe2579be 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); + selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); From 2cc7d5730957c4a3f3659d17d2ba5e06d5581c1f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:42:34 -0700 Subject: [PATCH 285/584] [NETFILTER]: Move reroute-after-queue code up to the nf_queue layer. The rerouting functionality is required by the core, therefore it has to be implemented by the core and not in individual queue handlers. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 ++++++ include/linux/netfilter_ipv6.h | 3 ++ net/core/netfilter.c | 66 +++++++++++++++++++++++++++------- net/ipv4/netfilter.c | 64 +++++++++++++++++++++++++++++++-- net/ipv4/netfilter/ip_queue.c | 27 -------------- net/ipv6/af_inet6.c | 7 ++++ net/ipv6/netfilter.c | 62 ++++++++++++++++++++++++++++++++ net/ipv6/netfilter/ip6_queue.c | 24 ------------- 8 files changed, 199 insertions(+), 65 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 54b97a1baba5..d163e20ca8d9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -198,6 +198,17 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +struct nf_queue_rerouter { + void (*save)(const struct sk_buff *skb, struct nf_info *info); + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); + int rer_size; +}; + +#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) + +extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); +extern int nf_unregister_queue_rerouter(int pf); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 20c069a5e4ac..5d204ee7a312 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,4 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 9849357f6129..1ed4f3110421 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -53,6 +53,9 @@ static struct nf_queue_handler_t { nf_queue_outfn_t outfn; void *data; } queue_handler[NPROTO]; + +static struct nf_queue_rerouter *queue_rerouter; + static DEFINE_RWLOCK(queue_handler_lock); int nf_register_hook(struct nf_hook_ops *reg) @@ -260,11 +263,34 @@ int nf_unregister_queue_handler(int pf) return 0; } +int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + + return 0; +} + +int nf_unregister_queue_rerouter(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + return 0; +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int nf_queue(struct sk_buff *skb, +static int nf_queue(struct sk_buff **skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, @@ -282,17 +308,17 @@ static int nf_queue(struct sk_buff *skb, read_lock(&queue_handler_lock); if (!queue_handler[pf].outfn) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } - info = kmalloc(sizeof(*info), GFP_ATOMIC); + info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - skb); + *skb); read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -311,15 +337,21 @@ static int nf_queue(struct sk_buff *skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = (*skb)->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif + if (queue_rerouter[pf].save) + queue_rerouter[pf].save(*skb, info); + + status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + + if (status >= 0 && queue_rerouter[pf].reroute) + status = queue_rerouter[pf].reroute(skb, info); - status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); read_unlock(&queue_handler_lock); if (status < 0) { @@ -332,9 +364,11 @@ static int nf_queue(struct sk_buff *skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(skb); + kfree_skb(*skb); + return 1; } + return 1; } @@ -365,7 +399,7 @@ next_hook: ret = -EPERM; } else if (verdict == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) goto next_hook; } unlock: @@ -428,7 +462,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, break; case NF_QUEUE: - if (!nf_queue(skb, elem, info->pf, info->hook, + if (!nf_queue(&skb, elem, info->pf, info->hook, info->indev, info->outdev, info->okfn)) goto next_hook; break; @@ -555,6 +589,12 @@ void __init netfilter_init(void) { int i, h; + queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), + GFP_KERNEL); + if (!queue_rerouter) + panic("netfilter: cannot allocate queue rerouter array\n"); + memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); + for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); @@ -573,4 +613,6 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); +EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6594d1c9697e..ae0779d82c5d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,10 +1,11 @@ -#include +/* IPv4 specific functions of netfilter core */ +#include #ifdef CONFIG_NETFILTER -/* IPv4 specific functions of netfilter core */ #include #include +#include #include #include @@ -76,4 +77,63 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); + +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip_rt_info { + u_int32_t daddr; + u_int32_t saddr; + u_int8_t tos; +}; + +static void queue_save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + const struct iphdr *iph = skb->nh.iph; + + rt_info->tos = iph->tos; + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + const struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + struct iphdr *iph = (*pskb)->nh.iph; + + if (!(iph->tos == rt_info->tos + && iph->daddr == rt_info->daddr + && iph->saddr == rt_info->saddr)) + return ip_route_me_harder(pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip_reroute = { + .rer_size = sizeof(struct ip_rt_info), + .save = queue_save, + .reroute = queue_reroute, +}; + +static int init(void) +{ + return nf_register_queue_rerouter(PF_INET, &ip_reroute); +} + +static void fini(void) +{ + nf_unregister_queue_rerouter(PF_INET); +} + +module_init(init); +module_exit(fini); + #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index b237f7fcad92..78892980f42c 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -43,17 +43,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" -struct ipq_rt_info { - __u8 tos; - __u32 daddr; - __u32 saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -305,14 +298,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = skb->nh.iph; - - entry->rt_info.tos = iph->tos; - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -393,18 +378,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = e->skb->nh.iph; - - if (!(iph->tos == e->rt_info.tos - && iph->daddr == e->rt_info.daddr - && iph->saddr == e->rt_info.saddr)) - return ip_route_me_harder(&e->skb); - } return 0; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 28d9bcab0970..574047353628 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -757,6 +758,9 @@ static int __init inet6_init(void) err = igmp6_init(&inet6_family_ops); if (err) goto igmp_fail; + err = ipv6_netfilter_init(); + if (err) + goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -813,6 +817,8 @@ proc_tcp6_fail: raw6_proc_exit(); proc_raw6_fail: #endif + ipv6_netfilter_fini(); +netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); @@ -852,6 +858,7 @@ static void __exit inet6_exit(void) ip6_route_cleanup(); ipv6_packet_cleanup(); igmp6_cleanup(); + ipv6_netfilter_fini(); ndisc_cleanup(); icmpv6_cleanup(); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 5656d0959aba..c8daef97cf56 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include #include @@ -40,4 +42,64 @@ int ip6_route_me_harder(struct sk_buff *skb) } EXPORT_SYMBOL(ip6_route_me_harder); +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip6_rt_info { + struct in6_addr daddr; + struct in6_addr saddr; +}; + +static void save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = skb->nh.ipv6h; + + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = (*pskb)->nh.ipv6h; + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + return ip6_route_me_harder(*pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip6_reroute = { + .rer_size = sizeof(struct ip6_rt_info), + .save = &save, + .reroute = &reroute, +}; + +int __init ipv6_netfilter_init(void) +{ + return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); +} + +void ipv6_netfilter_fini(void) +{ + nf_unregister_queue_rerouter(PF_INET6); +} + +#else /* CONFIG_NETFILTER */ +int __init ipv6_netfilter_init(void) +{ + return 0; +} + +void ipv6_netfilter_fini(void) +{ +} #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 1c3d247a22cc..c45d8f8815de 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -47,16 +47,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -302,13 +296,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = skb->nh.ipv6h; - - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -389,17 +376,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - * Not a nice way to cmp, but works - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = e->skb->nh.ipv6h; - if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || - !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) - return ip6_route_me_harder(e->skb); - } return 0; } From 0ab43f84995f2c2fcc5cc58a9accaa1095e1317f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:43:44 -0700 Subject: [PATCH 286/584] [NETFILTER]: Core changes required by upcoming nfnetlink_queue code - split netfiler verdict in 16bit verdict and 16bit queue number - add 'queuenum' argument to nf_queue_outfn_t and its users ip[6]_queue - move NFNL_SUBSYS_ definitions from enum to #define - introduce autoloading for nfnetlink subsystem modules - add MODULE_ALIAS_NFNL_SUBSYS macro - add nf_unregister_queue_handlers() to register all handlers for a given nf_queue_outfn_t - add more verbose DEBUGP macro definition to nfnetlink.c - make nfnetlink_subsys_register fail if subsys already exists - add some more comments and debug statements to nfnetlink.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 +++++++++- include/linux/netfilter/nfnetlink.h | 20 ++++++++------- net/core/netfilter.c | 40 ++++++++++++++++++++++++----- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 28 +++++++++++++++----- 6 files changed, 83 insertions(+), 25 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d163e20ca8d9..711e05f33d68 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,6 +21,16 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* we overload the higher bits for encoding auxiliary data such as the queue + * number. Not nice, but better than additional function arguments. */ +#define NF_VERDICT_MASK 0x0000ffff +#define NF_VERDICT_BITS 16 + +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. @@ -179,10 +189,12 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, /* Packet queuing */ typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, void *data); + struct nf_info *info, + unsigned int queuenum, void *data); extern int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data); extern int nf_unregister_queue_handler(int pf); +extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ace7a7be0742..561f9df28808 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -69,15 +69,14 @@ struct nfgenmsg { #define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) #define NFNL_MSG_TYPE(x) (x & 0x00ff) -enum nfnl_subsys_id { - NFNL_SUBSYS_NONE = 0, - NFNL_SUBSYS_CTNETLINK, - NFNL_SUBSYS_CTNETLINK_EXP, - NFNL_SUBSYS_IPTNETLINK, - NFNL_SUBSYS_QUEUE, - NFNL_SUBSYS_ULOG, - NFNL_SUBSYS_COUNT, -}; +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_COUNT 5 #ifdef __KERNEL__ @@ -142,5 +141,8 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ + MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) + #endif /* __KERNEL__ */ #endif /* _NFNETLINK_H */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 1ed4f3110421..3e38084ac2bd 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -221,7 +221,8 @@ static unsigned int nf_iterate(struct list_head *head, verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG - if (unlikely(verdict > NF_MAX_VERDICT)) { + if (unlikely((verdict & NF_VERDICT_MASK) + > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); continue; @@ -239,6 +240,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) { int ret; + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); if (queue_handler[pf].outfn) ret = -EBUSY; @@ -255,6 +259,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -286,6 +293,20 @@ int nf_unregister_queue_rerouter(int pf) return 0; } +void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +{ + int pf; + + write_lock_bh(&queue_handler_lock); + for (pf = 0; pf < NPROTO; pf++) { + if (queue_handler[pf].outfn == outfn) { + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + } + } + write_unlock_bh(&queue_handler_lock); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). @@ -295,7 +316,8 @@ static int nf_queue(struct sk_buff **skb, int pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -347,7 +369,8 @@ static int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + status = queue_handler[pf].outfn(*skb, info, queuenum, + queue_handler[pf].data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -397,9 +420,10 @@ next_hook: } else if (verdict == NF_DROP) { kfree_skb(*pskb); ret = -EPERM; - } else if (verdict == NF_QUEUE) { + } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; } unlock: @@ -456,14 +480,15 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, info->okfn, INT_MIN); } - switch (verdict) { + switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: info->okfn(skb); break; case NF_QUEUE: if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn)) + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; } @@ -613,6 +638,7 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 78892980f42c..cfc886f382ac 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -280,7 +280,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c45d8f8815de..5af4cee93d9b 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -278,7 +278,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 6210ca42166c..30b25f47f7cc 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,7 +44,9 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; #if 0 -#define DEBUGP printk +#define DEBUGP(format, args...) \ + printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ + __LINE__, __FUNCTION__, ## args) #else #define DEBUGP(format, args...) #endif @@ -67,11 +69,11 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) { DEBUGP("registering subsystem ID %u\n", n->subsys_id); - /* If the netlink socket wasn't created, then fail */ - if (!nfnl) - return -1; - nfnl_lock(); + if (subsys_table[n->subsys_id]) { + nfnl_unlock(); + return -EBUSY; + } subsys_table[n->subsys_id] = n; nfnl_unlock(); @@ -227,8 +229,18 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, type = nlh->nlmsg_type; ss = nfnetlink_get_subsys(type); - if (!ss) + if (!ss) { +#ifdef CONFIG_KMOD + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); + if (!ss) +#endif goto err_inval; + } nc = nfnetlink_find_client(type, ss); if (!nc) { @@ -252,12 +264,14 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, if (err < 0) goto err_inval; + DEBUGP("calling handler\n"); err = nc->call(nfnl, skb, nlh, cda, errp); *errp = err; return err; } err_inval: + DEBUGP("returning -EINVAL\n"); *errp = -EINVAL; return -1; } @@ -311,6 +325,8 @@ static void nfnetlink_rcv(struct sock *sk, int len) kfree_skb(skb); } + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ up(&nfnl_sem); } while(nfnl && nfnl->sk_receive_queue.qlen); } From 7af4cc3fa158ff1dda6e7451c7e6afa6b0bb85cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:44:15 -0700 Subject: [PATCH 287/584] [NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink - Add new nfnetlink_queue module - Add new ipt_NFQUEUE and ip6t_NFQUEUE modules to access queue numbers 1-65535 - Mark ip_queue and ip6_queue Kconfig options as OBSOLETE - Update feature-removal-schedule to remove ip[6]_queue in December Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 12 + include/linux/netfilter/nfnetlink_queue.h | 85 ++ include/linux/netfilter_ipv4/ipt_NFQUEUE.h | 16 + net/ipv4/netfilter/Kconfig | 6 +- net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_NFQUEUE.c | 70 ++ net/ipv6/netfilter/Kconfig | 11 +- net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NFQUEUE.c | 70 ++ net/netfilter/Kconfig | 8 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink_queue.c | 877 +++++++++++++++++++++ 12 files changed, 1153 insertions(+), 5 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_queue.h create mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h create mode 100644 net/ipv4/netfilter/ipt_NFQUEUE.c create mode 100644 net/ipv6/netfilter/ip6t_NFQUEUE.c create mode 100644 net/netfilter/nfnetlink_queue.c diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8b1430b46655..0665cb12bd66 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -135,3 +135,15 @@ Why: With the 16-bit PCMCIA subsystem now behaving (almost) like a pcmciautils package available at http://kernel.org/pub/linux/utils/kernel/pcmcia/ Who: Dominik Brodowski + +--------------------------- + +What: ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue) +When: December 2005 +Why: This interface has been obsoleted by the new layer3-independent + "nfnetlink_queue". The Kernel interface is compatible, so the old + ip[6]tables "QUEUE" targets still work and will transparently handle + all packets into nfnetlink queue number 0. Userspace users will have + to link against API-compatible library on top of libnfnetlink_queue + instead of the current 'libipq'. +Who: Harald Welte diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 index 000000000000..edb463a87eb4 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_QUEUE_H +#define _NFNETLINK_QUEUE_H + +#include + +enum nfqnl_msg_types { + NFQNL_MSG_PACKET, /* packet from kernel to userspace */ + NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ + NFQNL_MSG_CONFIG, /* connect to a particular queue */ + + NFQNL_MSG_MAX +}; + +struct nfqnl_msg_packet_hdr { + u_int32_t packet_id; /* unique ID of packet in queue */ + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +enum nfqnl_attr_type { + NFQA_UNSPEC, + NFQA_PACKET_HDR, + NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ + NFQA_MARK, /* u_int32_t nfmark */ + NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ + NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_HWADDR, /* nfqnl_msg_packet_hw */ + NFQA_PAYLOAD, /* opaque data payload */ + + __NFQA_MAX +}; +#define NFQA_MAX (__NFQA_MAX - 1) + +struct nfqnl_msg_verdict_hdr { + u_int32_t verdict; + u_int32_t id; +} __attribute__ ((packed)); + + +enum nfqnl_msg_config_cmds { + NFQNL_CFG_CMD_NONE, + NFQNL_CFG_CMD_BIND, + NFQNL_CFG_CMD_UNBIND, + NFQNL_CFG_CMD_PF_BIND, + NFQNL_CFG_CMD_PF_UNBIND, +}; + +struct nfqnl_msg_config_cmd { + u_int8_t command; /* nfqnl_msg_config_cmds */ + u_int8_t _pad; + u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ +} __attribute__ ((packed)); + +enum nfqnl_config_mode { + NFQNL_COPY_NONE, + NFQNL_COPY_META, + NFQNL_COPY_PACKET, +}; + +struct nfqnl_msg_config_params { + u_int32_t copy_range; + u_int8_t copy_mode; /* enum nfqnl_config_mode */ +} __attribute__ ((packed)); + + +enum nfqnl_attr_config { + NFQA_CFG_UNSPEC, + NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ + NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ + __NFQA_CFG_MAX +}; + +#endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 index 000000000000..b5b2943b0c66 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h @@ -0,0 +1,16 @@ +/* iptables module for using NFQUEUE mechanism + * + * (C) 2005 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * +*/ +#ifndef _IPT_NFQ_TARGET_H +#define _IPT_NFQ_TARGET_H + +/* target info */ +struct ipt_NFQ_info { + u_int16_t queuenum; +}; + +#endif /* _IPT_DSCP_TARGET_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e47ba39eb657..2fa26a41fa47 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -110,11 +110,15 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. config IP_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help Netfilter has the ability to queue packets to user space: the netlink device can be used to access them using this driver. + This option enables the old IPv4-only "ip_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). + To compile it as a module, choose M here. If unsure, say N. config IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index abf2a7d1a584..c2ae663b723f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -91,3 +91,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c new file mode 100644 index 000000000000..3cedc9be8807 --- /dev/null +++ b/net/ipv4/netfilter/ipt_NFQUEUE.c @@ -0,0 +1,70 @@ +/* iptables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 77ec704c9ee3..cd1551983c63 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK #fi config IP6_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- This option adds a queue handler to the kernel for IPv6 - packets which lets us to receive the filtered packets - with QUEUE target using libiptc as we can do with - the IPv4 now. + packets which enables users to receive the filtered packets + with QUEUE target using libipq. + + THis option enables the old IPv6-only "ip6_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). (C) Fernando Anton 2001 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2e51714953b6..847651dbcd2a 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c new file mode 100644 index 000000000000..c6e3730e7409 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c @@ -0,0 +1,70 @@ +/* ip6tables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("ip6tables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ip6t_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3629d3d1776d..f0eb23e5c5f1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -3,3 +3,11 @@ config NETFILTER_NETLINK help If this option is enabled, the kernel will include support for the new netfilter netlink interface. + +config NETFILTER_NETLINK_QUEUE + tristate "Netfilter NFQUEUE over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option isenabled, the kernel will include support + for queueing packets via NFNETLINK. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 02e67d371941..14a0b187e75e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 000000000000..24032610c425 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c @@ -0,0 +1,877 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFQNL_QMAX_DEFAULT 1024 + +#if 0 +#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define QDEBUG(x, ...) +#endif + +struct nfqnl_queue_entry { + struct list_head list; + struct nf_info *info; + struct sk_buff *skb; + unsigned int id; +}; + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_total; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + atomic_t id_sequence; /* 'sequence' of pkt ids */ + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + + spinlock_t lock; + + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); + +static DEFINE_RWLOCK(instances_lock); + +u_int64_t htonll(u_int64_t in) +{ + u_int64_t out; + int i; + + for (i = 0; i < sizeof(u_int64_t); i++) + ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; + + return out; +} + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +__instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct nfqnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(queue_num); + read_unlock_bh(&instances_lock); + + return inst; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + + QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(queue_num)) { + inst = NULL; + QDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + atomic_set(&inst->id_sequence, 0); + inst->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(queue_num)]); + + write_unlock_bh(&instances_lock); + + QDEBUG("successfully created new instance\n"); + + return inst; + +out_free: + kfree(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); + +static void +_instance_destroy2(struct nfqnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + QDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->queue_num); + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending skbs from the queue */ + nfqnl_flush(inst, NF_DROP); + + /* and finally free the data structure */ + kfree(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + + + +static void +issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +{ + QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); + + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); + nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + + kfree(entry); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry) +{ + list_add(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +/* + * Find and return a queued entry matched by cmpfn, or return the last + * entry if cmpfn is NULL. + */ +static inline struct nfqnl_queue_entry * +__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data) +{ + struct list_head *p; + + list_for_each_prev(p, &queue->queue_list) { + struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; + + if (!cmpfn || cmpfn(entry, data)) + return entry; + } + return NULL; +} + +static inline void +__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) +{ + list_del(&entry->list); + q->queue_total--; +} + +static inline struct nfqnl_queue_entry * +__find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + entry = __find_entry(queue, cmpfn, data); + if (entry == NULL) + return NULL; + + __dequeue_entry(queue, entry); + return entry; +} + + +static inline void +__nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + struct nfqnl_queue_entry *entry; + + while ((entry = __find_dequeue_entry(queue, NULL, 0))) + issue_verdict(entry, verdict); +} + +static inline int +__nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + return status; +} + +static struct nfqnl_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + spin_lock_bh(&queue->lock); + entry = __find_dequeue_entry(queue, cmpfn, data); + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + spin_lock_bh(&queue->lock); + __nfqnl_flush(queue, verdict); + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry, int *errp) +{ + unsigned char *old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nfqnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int tmp_uint; + + QDEBUG("entered\n"); + + /* all macros expand to constant values at compile time */ + size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + spin_lock_bh(&queue->lock); + + switch (queue->copy_mode) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + data_len = 0; + break; + + case NFQNL_COPY_PACKET: + if (queue->copy_range == 0 + || queue->copy_range > entry->skb->len) + data_len = entry->skb->len; + else + data_len = queue->copy_range; + + size += NLMSG_SPACE(data_len); + break; + + default: + *errp = -EINVAL; + spin_unlock_bh(&queue->lock); + return NULL; + } + + spin_unlock_bh(&queue->lock); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail= skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->info->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + pmsg.packet_id = htonl(entry->id); + pmsg.hw_protocol = htons(entry->skb->protocol); + pmsg.hook = entry->info->hook; + + NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (entry->info->indev) { + tmp_uint = htonl(entry->info->indev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->info->outdev) { + tmp_uint = htonl(entry->info->outdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->skb->nfmark) { + tmp_uint = htonl(entry->skb->nfmark); + NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + } + + if (entry->info->indev && entry->skb->dev + && entry->skb->dev->hard_header_parse) { + struct nfqnl_msg_packet_hw phw; + + phw.hw_addrlen = + entry->skb->dev->hard_header_parse(entry->skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } + + if (entry->skb->stamp.tv_sec) { + struct nfqnl_msg_packet_timestamp ts; + + ts.sec = htonll(entry->skb->stamp.tv_sec); + ts.usec = htonll(entry->skb->stamp.tv_usec); + + NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = NFQA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nfattr_failure: + if (skb) + kfree_skb(skb); + *errp = -EINVAL; + if (net_ratelimit()) + printk(KERN_ERR "nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) +{ + int status = -EINVAL; + struct sk_buff *nskb; + struct nfqnl_instance *queue; + struct nfqnl_queue_entry *entry; + + QDEBUG("entered\n"); + + queue = instance_lookup(queuenum); + if (!queue) { + QDEBUG("no queue instance matching\n"); + return -EINVAL; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + QDEBUG("mode COPY_NONE, aborting\n"); + return -EAGAIN; + } + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) { + if (net_ratelimit()) + printk(KERN_ERR + "nf_queue: OOM in nfqnl_enqueue_packet()\n"); + return -ENOMEM; + } + + entry->info = info; + entry->skb = skb; + entry->id = atomic_inc_return(&queue->id_sequence); + + nskb = nfqnl_build_packet_message(queue, entry, &status); + if (nskb == NULL) + goto err_out_free; + + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) + goto err_out_free_nskb; + + if (queue->queue_total >= queue->queue_maxlen) { + queue->queue_dropped++; + status = -ENOSPC; + if (net_ratelimit()) + printk(KERN_WARNING "ip_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", + queue->queue_total, queue->queue_dropped); + goto err_out_free_nskb; + } + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + if (status < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return status; + +err_out_free_nskb: + kfree_skb(nskb); + +err_out_unlock: + spin_unlock_bh(&queue->lock); + +err_out_free: + kfree(entry); + return status; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +{ + int diff; + + diff = data_len - e->skb->len; + if (diff < 0) + skb_trim(e->skb, data_len); + else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "ip_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(&e->skb, data_len)) + return -ENOMEM; + memcpy(e->skb->data, data, data_len); + + return 0; +} + +static inline int +id_cmp(struct nfqnl_queue_entry *e, unsigned long id) +{ + return (id == e->id); +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status; + + spin_lock_bh(&queue->lock); + status = __nfqnl_set_mode(queue, mode, range); + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +{ + if (entry->info->indev) + if (entry->info->indev->ifindex == ifindex) + return 1; + + if (entry->info->outdev) + if (entry->info->outdev->ifindex == ifindex) + return 1; + + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + QDEBUG("entering for ifindex %u\n", ifindex); + + /* this only looks like we have to hold the readlock for a way too long + * time, issue_verdict(), nf_reinject(), ... - but we always only + * issue NF_DROP, which is processed directly in nf_reinject() */ + read_lock_bh(&instances_lock); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry(inst, tmp, head, hlist) { + struct nfqnl_queue_entry *entry; + while ((entry = find_dequeue_entry(inst, dev_cmp, + ifindex)) != NULL) + issue_verdict(entry, NF_DROP); + } + } + + read_unlock_bh(&instances_lock); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nfqnl_queue_entry *entry; + + queue = instance_lookup(queue_num); + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + if (!nfqa[NFQA_VERDICT_HDR-1]) + return -EINVAL; + + vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + verdict = ntohl(vhdr->verdict); + + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + return -EINVAL; + + entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + if (nfqa[NFQA_PAYLOAD-1]) { + if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), + NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + verdict = NF_DROP; + } + + if (nfqa[NFQA_MARK-1]) + skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); + + issue_verdict(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + + QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + queue = instance_lookup(queue_num); + if (nfqa[NFQA_CFG_CMD-1]) { + struct nfqnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + QDEBUG("found CFG_CMD\n"); + + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) + return -EBUSY; + + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (!queue) + return -EINVAL; + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + QDEBUG("registering queue handler for pf=%u\n", + ntohs(cmd->pf)); + return nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); + + break; + case NFQNL_CFG_CMD_PF_UNBIND: + QDEBUG("unregistering queue handler for pf=%u\n", + ntohs(cmd->pf)); + /* This is a bug and a feature. We can unregister + * other handlers(!) */ + return nf_unregister_queue_handler(ntohs(cmd->pf)); + break; + default: + return -EINVAL; + } + } else { + if (!queue) { + QDEBUG("no config command, and no instance ENOENT\n"); + return -ENOENT; + } + + if (queue->peer_pid != NETLINK_CB(skb).pid) { + QDEBUG("no config command, and wrong pid\n"); + return -EPERM; + } + } + + if (nfqa[NFQA_CFG_PARAMS-1]) { + struct nfqnl_msg_config_params *params; + params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + return 0; +} + +static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .attr_count = NFQA_MAX, + .cb = nfqnl_cb, +}; + +static int +init_or_cleanup(int init) +{ + int status = -ENOMEM; + + if (!init) + goto cleanup; + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +cleanup: + nf_unregister_queue_handlers(nfqnl_enqueue_packet); + unregister_netdevice_notifier(&nfqnl_dev_notifier); + nfnetlink_subsys_unregister(&nfqnl_subsys); + +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(init); +module_exit(fini); From 0bd1b59b15e4057101c89d4db15a3683c0d897f7 Mon Sep 17 00:00:00 2001 From: Andrew McDonald Date: Tue, 9 Aug 2005 19:44:42 -0700 Subject: [PATCH 288/584] [IPV6]: Check interface bindings on IPv6 raw socket reception Take account of whether a socket is bound to a particular device when selecting an IPv6 raw socket to receive a packet. Also perform this check when receiving IPv6 packets with router alert options. Signed-off-by: Andrew McDonald Signed-off-by: David S. Miller --- include/net/rawv6.h | 3 ++- net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 4 +++- net/ipv6/raw.c | 11 ++++++++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6a221a..887009aa1f88 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -10,7 +10,8 @@ extern rwlock_t raw_v6_lock; extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr); + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif); extern int rawv6_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ff3ec9822e36..ee9f1d36346c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -551,7 +551,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) { + while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, + skb->dev->ifindex))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a7fcbcc83576..00f85148b85f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -277,7 +277,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; - if (sk && ra->sel == sel) { + if (sk && ra->sel == sel && + (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == skb->dev->ifindex)) { if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1d4d75b34d32..9db0de81f074 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -81,7 +81,8 @@ static void raw_v6_unhash(struct sock *sk) /* Grumble... icmp and ip_input want to get at this... */ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr) + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -94,6 +95,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, !ipv6_addr_equal(&np->daddr, rmt_addr)) continue; + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + continue; + if (!ipv6_addr_any(&np->rcv_saddr)) { if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) goto found; @@ -160,7 +164,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr); + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); while (sk) { if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { @@ -170,7 +174,8 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (clone) rawv6_rcv(sk, clone); } - sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr); + sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, + skb->dev->ifindex); } out: read_unlock(&raw_v6_lock); From d13964f4490157b8a290903362bfbc54f750a6bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:45:02 -0700 Subject: [PATCH 289/584] [IPV4/6]: Check if packet was actually delivered to a raw socket to decide whether to send an ICMP unreachable Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- include/net/rawv6.h | 2 +- net/ipv4/ip_input.c | 4 ++-- net/ipv4/raw.c | 5 ++++- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/raw.c | 5 ++++- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c45587a..1c4bc3e6809f 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,6 +37,6 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif); -extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); #endif /* _RAW_H */ diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 887009aa1f88..14476a71725e 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -7,7 +7,7 @@ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; -extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); +extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d603247bdfe9..81e18023dc19 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -225,8 +225,8 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) /* If there maybe a raw socket we must check - if not we * don't care less */ - if (raw_sk) - raw_v4_input(skb, skb->nh.iph, hash); + if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash)) + raw_sk = NULL; if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d1835b1bc8c4..e222c5c26b32 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -150,10 +150,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ -void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; + int delivered = 0; read_lock(&raw_v4_lock); head = &raw_v4_htable[hash]; @@ -164,6 +165,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) skb->dev->ifindex); while (sk) { + delivered = 1; if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); @@ -177,6 +179,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) } out: read_unlock(&raw_v4_lock); + return delivered; } void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index ab51c0369e15..6e3480426939 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -166,8 +166,8 @@ resubmit: nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); - if (raw_sk) - ipv6_raw_deliver(skb, nexthdr); + if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) + raw_sk = NULL; hash = nexthdr & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 9db0de81f074..a082646e6f16 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -141,11 +141,12 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) * * Caller owns SKB so we must make clones. */ -void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) +int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct in6_addr *saddr; struct in6_addr *daddr; struct sock *sk; + int delivered = 0; __u8 hash; saddr = &skb->nh.ipv6h->saddr; @@ -167,6 +168,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); while (sk) { + delivered = 1; if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); @@ -179,6 +181,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } out: read_unlock(&raw_v6_lock); + return delivered; } /* This cleans up af_inet6 a bit. -DaveM */ From e6848976b721eeb5551cd94673faafeef78d9f35 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:45:38 -0700 Subject: [PATCH 290/584] [NET]: Cleanup INET_REFCNT_DEBUG code Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_common.h | 1 - include/net/ipv6.h | 1 - include/net/sock.h | 32 +++++++++++++++++++++++++++++++- include/net/tcp.h | 2 +- net/core/sock.c | 6 +----- net/ipv4/af_inet.c | 18 ++---------------- net/ipv4/tcp.c | 7 +------ net/ipv4/tcp_minisocks.c | 20 ++++++++++++++++---- net/ipv6/af_inet6.c | 31 +++++++++++-------------------- net/ipv6/ipv6_sockglue.c | 15 ++++++++++++--- net/ipv6/tcp_ipv6.c | 18 +++++++++--------- net/sctp/ipv6.c | 5 +---- net/sctp/protocol.c | 4 +--- 13 files changed, 86 insertions(+), 74 deletions(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d140d8..1fbd94d8a316 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -29,7 +29,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p extern int inet_listen(struct socket *sock, int backlog); extern void inet_sock_destruct(struct sock *sk); -extern atomic_t inet_sock_nr; extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 533fc074ed90..c5a02ddc594a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -145,7 +145,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) -extern atomic_t inet6_sock_nr; int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); diff --git a/include/net/sock.h b/include/net/sock.h index e9b1dbab90d0..11b81551041e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -491,6 +491,9 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +/* Here is the right place to enable sock refcounting debugging */ +#define SOCK_REFCNT_DEBUG + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -561,7 +564,9 @@ struct proto { char name[32]; struct list_head node; - +#ifdef SOCK_REFCNT_DEBUG + atomic_t socks; +#endif struct { int inuse; u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; @@ -571,6 +576,31 @@ struct proto { extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); +#ifdef SOCK_REFCNT_DEBUG +static inline void sk_refcnt_debug_inc(struct sock *sk) +{ + atomic_inc(&sk->sk_prot->socks); +} + +static inline void sk_refcnt_debug_dec(struct sock *sk) +{ + atomic_dec(&sk->sk_prot->socks); + printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); +} + +static inline void sk_refcnt_debug_release(const struct sock *sk) +{ + if (atomic_read(&sk->sk_refcnt) != 1) + printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); +} +#else /* SOCK_REFCNT_DEBUG */ +#define sk_refcnt_debug_inc(sk) do { } while (0) +#define sk_refcnt_debug_dec(sk) do { } while (0) +#define sk_refcnt_debug_release(sk) do { } while (0) +#endif /* SOCK_REFCNT_DEBUG */ + /* Called with local bh disabled */ static __inline__ void sock_prot_inc_use(struct proto *prot) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 5010f0c5a56e..31984733777b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -306,7 +306,7 @@ extern kmem_cache_t *tcp_timewait_cachep; static inline void tcp_tw_put(struct tcp_tw_bucket *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef INET_REFCNT_DEBUG +#ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "tw_bucket %p released\n", tw); #endif kmem_cache_free(tcp_timewait_cachep, tw); diff --git a/net/core/sock.c b/net/core/sock.c index 51a5e7ddee85..a1a23be10aa3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1367,11 +1367,7 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) - printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", - sk, atomic_read(&sk->sk_refcnt)); -#endif + sk_refcnt_debug_release(sk); sock_put(sk); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 163ae4068b5f..9e83d7773d8f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -114,10 +114,6 @@ DEFINE_SNMP_STAT(struct linux_mib, net_statistics); -#ifdef INET_REFCNT_DEBUG -atomic_t inet_sock_nr; -#endif - extern void ip_mc_drop_socket(struct sock *sk); /* The inetsw table contains everything that inet_create needs to @@ -153,11 +149,7 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); dst_release(sk->sk_dst_cache); -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet_sock_nr); - printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", - sk, atomic_read(&inet_sock_nr)); -#endif + sk_refcnt_debug_dec(sk); } /* @@ -317,9 +309,7 @@ static int inet_create(struct socket *sock, int protocol) inet->mc_index = 0; inet->mc_list = NULL; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(sk); if (inet->num) { /* It assumes that any protocol which allows @@ -1205,7 +1195,3 @@ EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); EXPORT_SYMBOL(net_statistics); EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); - -#ifdef INET_REFCNT_DEBUG -EXPORT_SYMBOL(inet_sock_nr); -#endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 42a2e2ccd430..20159a3dafb3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1580,12 +1580,7 @@ void tcp_destroy_sock(struct sock *sk) xfrm_sk_free_policy(sk); -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) { - printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", - sk, atomic_read(&sk->sk_refcnt)); - } -#endif + sk_refcnt_debug_release(sk); atomic_dec(&tcp_orphan_count); sock_put(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f42a284164b7..f8e288c8d693 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -84,7 +84,7 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) tcp_bucket_destroy(tb); spin_unlock(&bhead->lock); -#ifdef INET_REFCNT_DEBUG +#ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, atomic_read(&tw->tw_refcnt)); @@ -799,9 +799,21 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newsk->sk_err = 0; newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + + /* + * Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy), same rationale as the first comment in this + * function. + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + atomic_inc(&tcp_sockets_allocated); if (sock_flag(newsk, SOCK_KEEPOPEN)) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 574047353628..7df2ccb380d9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -86,26 +86,12 @@ extern void if6_proc_exit(void); int sysctl_ipv6_bindv6only; -#ifdef INET_REFCNT_DEBUG -atomic_t inet6_sock_nr; -EXPORT_SYMBOL(inet6_sock_nr); -#endif - /* The inetsw table contains everything that inet_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); -static void inet6_sock_destruct(struct sock *sk) -{ - inet_sock_destruct(sk); - -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet6_sock_nr); -#endif -} - static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -186,7 +172,7 @@ static int inet6_create(struct socket *sock, int protocol) inet->hdrincl = 1; } - sk->sk_destruct = inet6_sock_destruct; + sk->sk_destruct = inet_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; @@ -213,12 +199,17 @@ static int inet6_create(struct socket *sock, int protocol) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; + /* + * Increment only the relevant sk_prot->socks debug field, this changes + * the previous behaviour of incrementing both the equivalent to + * answer->prot->socks (inet6_sock_nr) and inet_sock_nr. + * + * This allows better debug granularity as we'll know exactly how many + * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6 + * transport protocol socks. -acme + */ + sk_refcnt_debug_inc(sk); - -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); - atomic_inc(&inet_sock_nr); -#endif if (inet->num) { /* It assumes that any protocol which allows * the user to assign a number at socket diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bc144a79fa5..76fe23925d77 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -163,6 +163,13 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, fl6_free_socklist(sk); ipv6_sock_mc_close(sk); + /* + * Sock is moving from IPv6 to IPv4 (sk_prot), so + * remove it from the refcnt debug socks count in the + * original family... + */ + sk_refcnt_debug_dec(sk); + if (sk->sk_protocol == IPPROTO_TCP) { struct tcp_sock *tp = tcp_sk(sk); @@ -192,9 +199,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, kfree_skb(pktopt); sk->sk_destruct = inet_sock_destruct; -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet6_sock_nr); -#endif + /* + * ... and add it to the refcnt debug socks count + * in the new family. -acme + */ + sk_refcnt_debug_inc(sk); module_put(THIS_MODULE); retv = 0; break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ef29cfd936d3..885e05bd99f6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1407,12 +1407,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; - /* Charge newly allocated IPv6 socket. Though it is mapped, - * it is IPv6 yet. + /* + * No need to charge this sock to the relevant IPv6 refcnt debug socks count + * here, tcp_create_openreq_child now does this for us, see the comment in + * that function for the gory details. -acme */ -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); -#endif /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 af_tcp.af_specific. @@ -1467,10 +1466,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newsk == NULL) goto out; - /* Charge newly allocated IPv6 socket */ -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); -#endif + /* + * No need to charge this sock to the relevant IPv6 refcnt debug socks + * count here, tcp_create_openreq_child now does this for us, see the + * comment in that function for the gory details. -acme + */ ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e9b2fd480d61..4a6421a9fcab 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -641,10 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, else newinet->pmtudisc = IP_PMTUDISC_WANT; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ce9245e71fca..8d3f8096b873 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -593,9 +593,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet->mc_index = 0; newinet->mc_list = NULL; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); From 614c6cb4f225a7da9f13e5dd0fac3b531078eb9f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:47:37 -0700 Subject: [PATCH 291/584] [SOCK]: Rename __tcp_v4_rehash to __sk_prot_rehash This operation was already generic and DCCP will use it. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ net/ipv4/tcp_ipv4.c | 11 +---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 11b81551041e..f91ee82522ff 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -612,6 +612,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) prot->stats[smp_processor_id()].inuse--; } +/* With per-bucket locks this operation is not-atomic, so that + * this version is not worse. + */ +static inline void __sk_prot_rehash(struct sock *sk) +{ + sk->sk_prot->unhash(sk); + sk->sk_prot->hash(sk); +} + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 67c670886c1f..c7c99d336368 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1834,15 +1834,6 @@ do_time_wait: goto discard_it; } -/* With per-bucket locks this operation is not-atomic, so that - * this version is not worse. - */ -static void __tcp_v4_rehash(struct sock *sk) -{ - sk->sk_prot->unhash(sk); - sk->sk_prot->hash(sk); -} - static int tcp_v4_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -1889,7 +1880,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk) * Besides that, it does not check for connection * uniqueness. Wait for troubles. */ - __tcp_v4_rehash(sk); + __sk_prot_rehash(sk); return 0; } From 6cbb0df788b90777a7ed0f9d8261260353f48076 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:49:02 -0700 Subject: [PATCH 292/584] [SOCK]: Introduce sk_setup_caps From tcp_v4_setup_caps, that always is preceded by a call to __sk_dst_set, so coalesce this sequence into sk_setup_caps, removing one call to a TCP function in the IP layer. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ include/net/tcp.h | 9 --------- net/ipv4/ip_output.c | 7 ++----- net/ipv4/tcp_ipv4.c | 12 ++++-------- net/ipv4/tcp_minisocks.c | 1 + 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f91ee82522ff..69d869e41c35 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1025,6 +1025,16 @@ sk_dst_check(struct sock *sk, u32 cookie) return dst; } +static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) +{ + __sk_dst_set(sk, dst); + sk->sk_route_caps = dst->dev->features; + if (sk->sk_route_caps & NETIF_F_TSO) { + if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) + sk->sk_route_caps &= ~NETIF_F_TSO; + } +} + static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) { sk->sk_wmem_queued += skb->truesize; diff --git a/include/net/tcp.h b/include/net/tcp.h index 31984733777b..d95661a3aeeb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1658,15 +1658,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int return 1; } -static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) -{ - sk->sk_route_caps = dst->dev->features; - if (sk->sk_route_caps & NETIF_F_TSO) { - if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; - } -} - #define TCP_CHECK_TIMER(sk) do { } while (0) static inline int tcp_use_frto(const struct sock *sk) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c934f5316c3b..c72fc878f06d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -69,13 +69,10 @@ #include #include #include -#include -#include #include #include #include #include -#include #include #include #include @@ -84,6 +81,7 @@ #include #include #include +#include /* * Shall we try to damage output packets if routing dev changes? @@ -329,8 +327,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } - __sk_dst_set(sk, &rt->u.dst); - tcp_v4_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->u.dst); } skb->dst = dst_clone(&rt->u.dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7c99d336368..4a5daecbd2ac 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -837,8 +837,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ - __sk_dst_set(sk, &rt->u.dst); - tcp_v4_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->saddr, @@ -1553,8 +1552,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; - newsk->sk_dst_cache = dst; - tcp_v4_setup_caps(newsk, dst); + sk_setup_caps(newsk, dst); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); @@ -1855,8 +1853,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk) if (err) return err; - __sk_dst_set(sk, &rt->u.dst); - tcp_v4_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->u.dst); new_saddr = rt->rt_src; @@ -1914,8 +1911,7 @@ int tcp_v4_rebuild_header(struct sock *sk) err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) { - __sk_dst_set(sk, &rt->u.dst); - tcp_v4_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->u.dst); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f8e288c8d693..7c46a553c4af 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -711,6 +711,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, bh_lock_sock(newsk); rwlock_init(&newsk->sk_dst_lock); + newsk->sk_dst_cache = NULL; atomic_set(&newsk->sk_rmem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); atomic_set(&newsk->sk_wmem_alloc, 0); From 32519f11d38ea8f4f60896763bacec7db1760f9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:50:02 -0700 Subject: [PATCH 293/584] [INET]: Introduce inet_sk_rebuild_header From tcp_v4_rebuild_header, that already was pretty generic, I only needed to use sk->sk_protocol instead of the hardcoded IPPROTO_TCP and establish the requirement that INET transport layer protocols that want to use this function map TCP_SYN_SENT to its equivalent state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 + include/net/tcp.h | 2 - net/ipv4/af_inet.c | 113 +++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 5 -- net/ipv4/tcp_ipv4.c | 98 +------------------------------------ net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 117 insertions(+), 105 deletions(-) diff --git a/include/linux/ip.h b/include/linux/ip.h index 31e7cedd9f84..33e8a19a1a0f 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #endif #endif +extern int inet_sk_rebuild_header(struct sock *sk); + struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/net/tcp.h b/include/net/tcp.h index d95661a3aeeb..0c769adb0463 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -799,8 +799,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_rebuild_header(struct sock *sk); - extern int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e83d7773d8f..7137e6420d66 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -951,6 +951,119 @@ void inet_unregister_protosw(struct inet_protosw *p) } } +/* + * Shall we try to damage output packets if routing dev changes? + */ + +int sysctl_ip_dynaddr; + +static int inet_sk_reselect_saddr(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + struct rtable *rt; + __u32 old_saddr = inet->saddr; + __u32 new_saddr; + __u32 daddr = inet->daddr; + + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; + + /* Query new route. */ + err = ip_route_connect(&rt, daddr, 0, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if, + sk->sk_protocol, + inet->sport, inet->dport, sk); + if (err) + return err; + + sk_setup_caps(sk, &rt->u.dst); + + new_saddr = rt->rt_src; + + if (new_saddr == old_saddr) + return 0; + + if (sysctl_ip_dynaddr > 1) { + printk(KERN_INFO "%s(): shifting inet->" + "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", + __FUNCTION__, + NIPQUAD(old_saddr), + NIPQUAD(new_saddr)); + } + + inet->saddr = inet->rcv_saddr = new_saddr; + + /* + * XXX The only one ugly spot where we need to + * XXX really change the sockets identity after + * XXX it has entered the hashes. -DaveM + * + * Besides that, it does not check for connection + * uniqueness. Wait for troubles. + */ + __sk_prot_rehash(sk); + return 0; +} + +int inet_sk_rebuild_header(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); + u32 daddr; + int err; + + /* Route is OK, nothing to do. */ + if (rt) + return 0; + + /* Reroute. */ + daddr = inet->daddr; + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; +{ + struct flowi fl = { + .oif = sk->sk_bound_dev_if, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = inet->saddr, + .tos = RT_CONN_FLAGS(sk), + }, + }, + .proto = sk->sk_protocol, + .uli_u = { + .ports = { + .sport = inet->sport, + .dport = inet->dport, + }, + }, + }; + + err = ip_route_output_flow(&rt, &fl, sk, 0); +} + if (!err) + sk_setup_caps(sk, &rt->u.dst); + else { + /* Routing failed... */ + sk->sk_route_caps = 0; + /* + * Other protocols have to map its equivalent state to TCP_SYN_SENT. + * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme + */ + if (!sysctl_ip_dynaddr || + sk->sk_state != TCP_SYN_SENT || + (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || + (err = inet_sk_reselect_saddr(sk)) != 0) + sk->sk_err_soft = -err; + } + + return err; +} + +EXPORT_SYMBOL(inet_sk_rebuild_header); + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c72fc878f06d..dd568b0b7062 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,11 +83,6 @@ #include #include -/* - * Shall we try to damage output packets if routing dev changes? - */ - -int sysctl_ip_dynaddr; int sysctl_ip_default_ttl = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a5daecbd2ac..ae6fad99a9a9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1832,101 +1832,6 @@ do_time_wait: goto discard_it; } -static int tcp_v4_reselect_saddr(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - int err; - struct rtable *rt; - __u32 old_saddr = inet->saddr; - __u32 new_saddr; - __u32 daddr = inet->daddr; - - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, - IPPROTO_TCP, - inet->sport, inet->dport, sk); - if (err) - return err; - - sk_setup_caps(sk, &rt->u.dst); - - new_saddr = rt->rt_src; - - if (new_saddr == old_saddr) - return 0; - - if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" - "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); - } - - inet->saddr = new_saddr; - inet->rcv_saddr = new_saddr; - - /* XXX The only one ugly spot where we need to - * XXX really change the sockets identity after - * XXX it has entered the hashes. -DaveM - * - * Besides that, it does not check for connection - * uniqueness. Wait for troubles. - */ - __sk_prot_rehash(sk); - return 0; -} - -int tcp_v4_rebuild_header(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; - int err; - - /* Route is OK, nothing to do. */ - if (rt) - return 0; - - /* Reroute. */ - daddr = inet->daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->saddr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = inet->dport } } }; - - err = ip_route_output_flow(&rt, &fl, sk, 0); - } - if (!err) { - sk_setup_caps(sk, &rt->u.dst); - return 0; - } - - /* Routing failed... */ - sk->sk_route_caps = 0; - - if (!sysctl_ip_dynaddr || - sk->sk_state != TCP_SYN_SENT || - (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || - (err = tcp_v4_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; - - return err; -} - static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; @@ -1998,7 +1903,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) struct tcp_func ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, @@ -2630,7 +2535,6 @@ EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_rebuild_header); EXPORT_SYMBOL(tcp_v4_remember_stamp); EXPORT_SYMBOL(tcp_v4_send_check); EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 885e05bd99f6..4e32a8496be3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1983,7 +1983,7 @@ static struct tcp_func ipv6_specific = { static struct tcp_func ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, From 838ab6364956d9bdcefe84712de1621cf20a40b3 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:50:45 -0700 Subject: [PATCH 294/584] [NETFILTER]: Add refcounting and /proc/net/netfilter interface to nfnetlink_queue Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_queue.h | 1 + net/netfilter/nfnetlink_queue.c | 248 +++++++++++++++++++--- 2 files changed, 221 insertions(+), 28 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index edb463a87eb4..e142b0ff7c08 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -81,5 +81,6 @@ enum nfqnl_attr_config { NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ __NFQA_CFG_MAX }; +#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1) #endif /* _NFNETLINK_QUEUE_H */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 24032610c425..eab309e3d42e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ struct nfqnl_queue_entry { struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ + atomic_t use; int peer_pid; unsigned int queue_maxlen; @@ -105,17 +107,28 @@ __instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) +instance_lookup_get(u_int16_t queue_num) { struct nfqnl_instance *inst; read_lock_bh(&instances_lock); inst = __instance_lookup(queue_num); + if (inst) + atomic_inc(&inst->use); read_unlock_bh(&instances_lock); return inst; } +static void +instance_put(struct nfqnl_instance *inst) +{ + if (inst && atomic_dec_and_test(&inst->use)) { + QDEBUG("kfree(inst=%p)\n", inst); + kfree(inst); + } +} + static struct nfqnl_instance * instance_create(u_int16_t queue_num, int pid) { @@ -141,6 +154,8 @@ instance_create(u_int16_t queue_num, int pid) inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; atomic_set(&inst->id_sequence, 0); + /* needs to be two, since we _put() after creation */ + atomic_set(&inst->use, 2); inst->lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&inst->queue_list); @@ -182,8 +197,8 @@ _instance_destroy2(struct nfqnl_instance *inst, int lock) /* then flush all pending skbs from the queue */ nfqnl_flush(inst, NF_DROP); - /* and finally free the data structure */ - kfree(inst); + /* and finally put the refcount */ + instance_put(inst); module_put(THIS_MODULE); } @@ -471,7 +486,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, QDEBUG("entered\n"); - queue = instance_lookup(queuenum); + queue = instance_lookup_get(queuenum); if (!queue) { QDEBUG("no queue instance matching\n"); return -EINVAL; @@ -479,7 +494,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, if (queue->copy_mode == NFQNL_COPY_NONE) { QDEBUG("mode COPY_NONE, aborting\n"); - return -EAGAIN; + status = -EAGAIN; + goto err_out_put; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); @@ -487,7 +503,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, if (net_ratelimit()) printk(KERN_ERR "nf_queue: OOM in nfqnl_enqueue_packet()\n"); - return -ENOMEM; + status = -ENOMEM; + goto err_out_put; } entry->info = info; @@ -523,6 +540,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, __enqueue_entry(queue, entry); spin_unlock_bh(&queue->lock); + instance_put(queue); return status; err_out_free_nskb: @@ -533,6 +551,8 @@ err_out_unlock: err_out_free: kfree(entry); +err_out_put: + instance_put(queue); return status; } @@ -685,6 +705,12 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; +static const int nfqa_verdict_min[NFQA_MAX] = { + [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), + [NFQA_MARK-1] = sizeof(u_int32_t), + [NFQA_PAYLOAD-1] = 0, +}; + static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -696,26 +722,40 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; unsigned int verdict; struct nfqnl_queue_entry *entry; + int err; - queue = instance_lookup(queue_num); + if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { + QDEBUG("bad attribute size\n"); + return -EINVAL; + } + + queue = instance_lookup_get(queue_num); if (!queue) return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) - return -EPERM; + if (queue->peer_pid != NETLINK_CB(skb).pid) { + err = -EPERM; + goto err_out_put; + } - if (!nfqa[NFQA_VERDICT_HDR-1]) - return -EINVAL; + if (!nfqa[NFQA_VERDICT_HDR-1]) { + err = -EINVAL; + goto err_out_put; + } vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) - return -EINVAL; + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { + err = -EINVAL; + goto err_out_put; + } entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); - if (entry == NULL) - return -ENOENT; + if (entry == NULL) { + err = -ENOENT; + goto err_out_put; + } if (nfqa[NFQA_PAYLOAD-1]) { if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), @@ -727,7 +767,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); issue_verdict(entry, verdict); + instance_put(queue); return 0; + +err_out_put: + instance_put(queue); + return err; } static int @@ -737,6 +782,11 @@ nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, return -ENOTSUPP; } +static const int nfqa_cfg_min[NFQA_CFG_MAX] = { + [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), + [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +}; + static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -744,10 +794,16 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; + int ret = 0; QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - queue = instance_lookup(queue_num); + if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { + QDEBUG("bad attribute size\n"); + return -EINVAL; + } + + queue = instance_lookup_get(queue_num); if (nfqa[NFQA_CFG_CMD-1]) { struct nfqnl_msg_config_cmd *cmd; cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); @@ -766,17 +822,19 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, if (!queue) return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) - return -EPERM; + if (queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto out_put; + } instance_destroy(queue); break; case NFQNL_CFG_CMD_PF_BIND: QDEBUG("registering queue handler for pf=%u\n", ntohs(cmd->pf)); - return nf_register_queue_handler(ntohs(cmd->pf), - nfqnl_enqueue_packet, - NULL); + ret = nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); break; case NFQNL_CFG_CMD_PF_UNBIND: @@ -784,20 +842,23 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ntohs(cmd->pf)); /* This is a bug and a feature. We can unregister * other handlers(!) */ - return nf_unregister_queue_handler(ntohs(cmd->pf)); + ret = nf_unregister_queue_handler(ntohs(cmd->pf)); break; default: - return -EINVAL; + ret = -EINVAL; + break; } } else { if (!queue) { QDEBUG("no config command, and no instance ENOENT\n"); - return -ENOENT; + ret = -ENOENT; + goto out_put; } if (queue->peer_pid != NETLINK_CB(skb).pid) { QDEBUG("no config command, and wrong pid\n"); - return -EPERM; + ret = -EPERM; + goto out_put; } } @@ -809,7 +870,9 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ntohl(params->copy_range)); } - return 0; +out_put: + instance_put(queue); + return ret; } static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { @@ -829,14 +892,132 @@ static struct nfnetlink_subsystem nfqnl_subsys = { .cb = nfqnl_cb, }; +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfqnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + inst->queue_num, + inst->peer_pid, inst->queue_total, + inst->copy_mode, inst->copy_range, + inst->queue_dropped, inst->queue_user_dropped, + atomic_read(&inst->id_sequence), + atomic_read(&inst->use)); +} + +static struct seq_operations nfqnl_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqnl_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct iter_state *is; + int ret; + + is = kmalloc(sizeof(*is), GFP_KERNEL); + if (!is) + return -ENOMEM; + memset(is, 0, sizeof(*is)); + ret = seq_open(file, &nfqnl_seq_ops); + if (ret < 0) + goto out_free; + seq = file->private_data; + seq->private = is; + return ret; +out_free: + kfree(is); + return ret; +} + +static struct file_operations nfqnl_file_ops = { + .owner = THIS_MODULE, + .open = nfqnl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + static int init_or_cleanup(int init) { - int status = -ENOMEM; + int i, status = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nfqueue; +#endif if (!init) goto cleanup; + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); if (status < 0) { @@ -844,14 +1025,25 @@ init_or_cleanup(int init) goto cleanup_netlink_notifier; } +#ifdef CONFIG_PROC_FS + proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, + proc_net_netfilter); + if (!proc_nfqueue) + goto cleanup_subsys; + proc_nfqueue->proc_fops = &nfqnl_file_ops; +#endif + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; cleanup: nf_unregister_queue_handlers(nfqnl_enqueue_packet); unregister_netdevice_notifier(&nfqnl_dev_notifier); +#ifdef CONFIG_PROC_FS +cleanup_subsys: +#endif nfnetlink_subsys_unregister(&nfqnl_subsys); - cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); return status; From 608c8e4f7b6e61cc783283e9dff8a465a5ad59bb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:27 -0700 Subject: [PATCH 295/584] [NETFILTER]: Extend netfilter logging API This patch is in preparation to nfnetlink_log: - loggers now have to register struct nf_logger instead of nf_logfn - nf_log_unregister() replaced by nf_log_unregister_pf() and nf_log_unregister_logger() - add comment to ip[6]t_LOG.h to assure nobody redefines flags - add /proc/net/netfilter/nf_log to tell user which logger is currently registered for which address family - if user has configured logging, but no logging backend (logger) is available, always spit a message to syslog, not just the first time. - split ip[6]t_LOG.c into two parts: Backend: Always try to register as logger for the respective address family Frontend: Always log via nf_log_packet() API - modify all users of nf_log_packet() to accomodate additional argument Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 48 ++++++- include/linux/netfilter_ipv4/ipt_LOG.h | 1 + include/linux/netfilter_ipv6/ip6t_LOG.h | 1 + net/core/netfilter.c | 127 ++++++++++++++++--- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 8 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 21 +-- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 6 +- net/ipv4/netfilter/ipt_LOG.c | 86 +++++++------ net/ipv4/netfilter/ipt_ULOG.c | 33 +++-- net/ipv6/netfilter/ip6t_LOG.c | 93 ++++++++------ 10 files changed, 299 insertions(+), 125 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 711e05f33d68..815583af06c2 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -typedef void nf_logfn(unsigned int hooknum, +/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will + * disappear once iptables is replaced with pkttables. Please DO NOT use them + * for any new code! */ +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_MASK 0x0f + +#define NF_LOG_TYPE_LOG 0x01 +#define NF_LOG_TYPE_ULOG 0x02 + +struct nf_loginfo { + u_int8_t type; + union { + struct { + u_int32_t copy_len; + u_int16_t group; + u_int16_t qthreshold; + } ulog; + struct { + u_int8_t level; + u_int8_t logflags; + } log; + } u; +}; + +typedef void nf_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix); +struct nf_logger { + struct module *me; + nf_logfn *logfn; + char *name; +}; + /* Function to register/unregister log function. */ -int nf_log_register(int pf, nf_logfn *logfn); -void nf_log_unregister(int pf, nf_logfn *logfn); +int nf_log_register(int pf, struct nf_logger *logger); +void nf_log_unregister_pf(int pf); +void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ void nf_log_packet(int pf, @@ -130,6 +166,7 @@ void nf_log_packet(int pf, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *li, const char *fmt, ...); /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -221,6 +258,11 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#ifdef CONFIG_PROC_FS +#include +extern struct proc_dir_entry *proc_net_netfilter; +#endif + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index d25f782e57d1..22d16177319b 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,7 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 42996a43bb39..9008ff5c40ae 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,7 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 3e38084ac2bd..98cc61e79fea 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* In this code, we can be waiting indefinitely for userspace to @@ -535,11 +536,10 @@ EXPORT_SYMBOL(skb_make_writable); #define NF_LOG_PREFIXLEN 128 -static nf_logfn *nf_logging[NPROTO]; /* = NULL */ -static int reported = 0; +static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ static DEFINE_SPINLOCK(nf_log_lock); -int nf_log_register(int pf, nf_logfn *logfn) +int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; @@ -547,54 +547,134 @@ int nf_log_register(int pf, nf_logfn *logfn) * substituting pointer. */ spin_lock(&nf_log_lock); if (!nf_logging[pf]) { - rcu_assign_pointer(nf_logging[pf], logfn); + rcu_assign_pointer(nf_logging[pf], logger); ret = 0; } spin_unlock(&nf_log_lock); return ret; } -void nf_log_unregister(int pf, nf_logfn *logfn) +void nf_log_unregister_pf(int pf) { spin_lock(&nf_log_lock); - if (nf_logging[pf] == logfn) - nf_logging[pf] = NULL; + nf_logging[pf] = NULL; spin_unlock(&nf_log_lock); /* Give time to concurrent readers. */ synchronize_net(); -} +} + +void nf_log_unregister_logger(struct nf_logger *logger) +{ + int i; + + spin_lock(&nf_log_lock); + for (i = 0; i < NPROTO; i++) { + if (nf_logging[i] == logger) + nf_logging[i] = NULL; + } + spin_unlock(&nf_log_lock); + + synchronize_net(); +} void nf_log_packet(int pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *loginfo, const char *fmt, ...) { va_list args; char prefix[NF_LOG_PREFIXLEN]; - nf_logfn *logfn; + struct nf_logger *logger; rcu_read_lock(); - logfn = rcu_dereference(nf_logging[pf]); - if (logfn) { + logger = rcu_dereference(nf_logging[pf]); + if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); /* We must read logging before nf_logfn[pf] */ - logfn(hooknum, skb, in, out, prefix); - } else if (!reported) { - printk(KERN_WARNING "nf_log_packet: can\'t log yet, " - "no backend logging module loaded in!\n"); - reported++; + logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); + } else if (net_ratelimit()) { + printk(KERN_WARNING "nf_log_packet: can\'t log since " + "no backend logging module loaded in! Please either " + "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } EXPORT_SYMBOL(nf_log_register); -EXPORT_SYMBOL(nf_log_unregister); +EXPORT_SYMBOL(nf_log_unregister_pf); +EXPORT_SYMBOL(nf_log_unregister_logger); EXPORT_SYMBOL(nf_log_packet); +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_netfilter; +EXPORT_SYMBOL(proc_net_netfilter); + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + rcu_read_lock(); + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + rcu_read_unlock(); +} + +static int seq_show(struct seq_file *s, void *v) +{ + loff_t *pos = v; + const struct nf_logger *logger; + + logger = rcu_dereference(nf_logging[*pos]); + + if (!logger) + return seq_printf(s, "%2lld NONE\n", *pos); + + return seq_printf(s, "%2lld %s\n", *pos, logger->name); +} + +static struct seq_operations nflog_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nflog_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nflog_seq_ops); +} + +static struct file_operations nflog_file_ops = { + .owner = THIS_MODULE, + .open = nflog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* PROC_FS */ + + /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ @@ -613,6 +693,9 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) void __init netfilter_init(void) { int i, h; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); @@ -624,6 +707,16 @@ void __init netfilter_init(void) for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } + +#ifdef CONFIG_PROC_FS + proc_net_netfilter = proc_mkdir("netfilter", proc_net); + if (!proc_net_netfilter) + panic("cannot create netfilter proc entry"); + pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); + if (!pde) + panic("cannot create /proc/net/netfilter/nf_log"); + pde->proc_fops = &nflog_file_ops; +#endif } EXPORT_SYMBOL(ip_ct_attach); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 3f90cb9979ac..838d1d69b36e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -231,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, if (!(u16)csum_fold(skb->csum)) break; if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad ICMP checksum "); return -NF_ACCEPT; } @@ -254,7 +254,7 @@ checksum_skipped: */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index c2bce22d4031..f23ef1f88c46 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -716,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, res = 1; } else { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -815,7 +815,7 @@ static int tcp_error(struct sk_buff *skb, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -823,7 +823,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -840,7 +840,7 @@ static int tcp_error(struct sk_buff *skb, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -849,7 +849,7 @@ static int tcp_error(struct sk_buff *skb, tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -897,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, */ write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: killing out of sync session "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: " + "killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -912,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: @@ -922,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: @@ -943,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid SYN"); + NULL, "ip_ct_tcp: invalid SYN"); return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 14130169cbfd..f2dcac7c7660 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -98,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: short packet "); return -NF_ACCEPT; } @@ -106,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -126,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, udplen, 0))) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index ef08733d26da..92ed050fac69 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -27,10 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - #if 0 #define DEBUGP printk #else @@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ipt_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { struct iphdr _iph, *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { @@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info, if (ntohs(ih->frag_off) & IP_OFFSET) printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((info->logflags & IPT_LOG_IPOPT) + if ((logflags & IPT_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; unsigned int i, optsize; @@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IPT_LOG_TCPSEQ) + if (logflags & IPT_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IPT_LOG_TCPOPT) + if ((logflags & IPT_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; unsigned char *op; @@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) { + if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ipt_log_packet(unsigned int hooknum, +ipt_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ipt_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER @@ -405,30 +419,17 @@ ipt_log_target(struct sk_buff **pskb, void *userinfo) { const struct ipt_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; - level_string[1] = '0' + (loginfo->level % 8); - ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; + + nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); return IPT_CONTINUE; } -static void -ipt_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ipt_log_info loginfo = { - .level = 0, - .logflags = IPT_LOG_MASK, - .prefix = "" - }; - - ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); -} - static int ipt_log_checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo, @@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_log_logger ={ + .name = "ipt_LOG", + .logfn = &ipt_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ipt_register_target(&ipt_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { + printk(KERN_WARNING "ipt_LOG: not logging via system console " + "since somebody else already registered for PF_INET\n"); + /* we cannot make module load fail here, since otherwise + * iptables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_log_logger); ipt_unregister_target(&ipt_log_reg); } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 4ea8371ab270..b86f06ec9762 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -304,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, return IPT_CONTINUE; } -static void ipt_logfn(unsigned int hooknum, +static void ipt_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix) { - struct ipt_ulog_info loginfo = { - .nl_group = ULOG_DEFAULT_NLGROUP, - .copy_range = 0, - .qthreshold = ULOG_DEFAULT_QTHRESHOLD, - .prefix = "" - }; + struct ipt_ulog_info loginfo; + + if (!li || li->type != NF_LOG_TYPE_ULOG) { + loginfo.nl_group = ULOG_DEFAULT_NLGROUP; + loginfo.copy_range = 0; + loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; + loginfo.prefix[0] = '\0'; + } else { + loginfo.nl_group = li->u.ulog.group; + loginfo.copy_range = li->u.ulog.copy_len; + loginfo.qthreshold = li->u.ulog.qthreshold; + strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); + } ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } @@ -355,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_ulog_logger = { + .name = "ipt_ULOG", + .logfn = &ipt_logfn, + .me = THIS_MODULE, +}; + static int __init init(void) { int i; @@ -382,7 +397,7 @@ static int __init init(void) return -EINVAL; } if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + nf_log_register(PF_INET, &ipt_ulog_logger); return 0; } @@ -395,7 +410,7 @@ static void __exit fini(void) DEBUGP("ipt_ULOG: cleanup_module\n"); if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_ulog_logger); ipt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a692e26a4fa3..0cd1d1bd9033 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - struct in_device; #include #include @@ -44,7 +40,7 @@ struct in_device; static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ip6t_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) { @@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info, struct ipv6hdr _ip6h, *ih; unsigned int ptr; unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { @@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 48 "OPT (...) " */ - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk("OPT ( "); switch (currenthdr) { @@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(")"); return; } @@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info, break; /* Max Length */ case IPPROTO_AH: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_auth_hdr _ahdr, *ah; /* Max length: 3 "AH " */ @@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_esp_hdr _esph, *eh; /* Max length: 4 "ESP " */ @@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("Unknown Ext Hdr %u", currenthdr); return; } - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(") "); currenthdr = hp->nexthdr; @@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IP6T_LOG_TCPSEQ) + if (logflags & IP6T_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IP6T_LOG_TCPOPT) + if ((logflags & IP6T_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; unsigned int i; @@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) { + if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -357,19 +359,31 @@ static void dump_packet(const struct ip6t_log_info *info, } } +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ip6t_log_packet(unsigned int hooknum, +ip6t_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ip6t_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); if (in && !out) { @@ -416,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb, void *userinfo) { const struct ip6t_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; - level_string[1] = '0' + (loginfo->level % 8); - ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; + + nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); return IP6T_CONTINUE; } -static void -ip6t_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ip6t_log_info loginfo = { - .level = 0, - .logflags = IP6T_LOG_MASK, - .prefix = "" - }; - - ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); -} static int ip6t_log_checkentry(const char *tablename, const struct ip6t_entry *e, @@ -475,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ip6t_logger = { + .name = "ip6t_LOG", + .logfn = &ip6t_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ip6t_register_target(&ip6t_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET6, &ip6t_logfn); + if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { + printk(KERN_WARNING "ip6t_LOG: not logging via system console " + "since somebody else already registered for PF_INET6\n"); + /* we cannot make module load fail here, since otherwise + * ip6tables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET6, &ip6t_logfn); + nf_log_unregister_logger(&ip6t_logger); ip6t_unregister_target(&ip6t_log_reg); } From 0597f2680d666a3bcf101ac0c771ba7e50016bbd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:39 -0700 Subject: [PATCH 296/584] [NETFILTER]: Add new "nfnetlink_log" userspace packet logging facility This is a generic (layer3 independent) version of what ipt_ULOG is already doing for IPv4 today. ipt_ULOG, ebt_ulog and finally also ip[6]t_LOG will be deprecated by this mechanism in the long term. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 85 ++ net/netfilter/Kconfig | 11 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink_log.c | 995 ++++++++++++++++++++++++ net/netfilter/nfnetlink_queue.c | 1 + 5 files changed, 1093 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink_log.h create mode 100644 net/netfilter/nfnetlink_log.c diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h new file mode 100644 index 000000000000..420ff4625cbf --- /dev/null +++ b/include/linux/netfilter/nfnetlink_log.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_LOG_H +#define _NFNETLINK_LOG_H + +/* This file describes the netlink messages (i.e. 'protocol packets'), + * and not any kind of function definitions. It is shared between kernel and + * userspace. Don't put kernel specific stuff in here */ + +#include + +enum nfulnl_msg_types { + NFULNL_MSG_PACKET, /* packet from kernel to userspace */ + NFULNL_MSG_CONFIG, /* connect to a particular queue */ + + NFULNL_MSG_MAX +}; + +struct nfulnl_msg_packet_hdr { + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ + u_int8_t _pad; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +#define NFULNL_PREFIXLEN 30 /* just like old log target */ + +enum nfulnl_attr_type { + NFULA_UNSPEC, + NFULA_PACKET_HDR, + NFULA_MARK, /* u_int32_t nfmark */ + NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ + NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_HWADDR, /* nfulnl_msg_packet_hw */ + NFULA_PAYLOAD, /* opaque data payload */ + NFULA_PREFIX, /* string prefix */ + NFULA_UID, /* user id of socket */ + + __NFULA_MAX +}; +#define NFULA_MAX (__NFULA_MAX - 1) + +enum nfulnl_msg_config_cmds { + NFULNL_CFG_CMD_NONE, + NFULNL_CFG_CMD_BIND, + NFULNL_CFG_CMD_UNBIND, + NFULNL_CFG_CMD_PF_BIND, + NFULNL_CFG_CMD_PF_UNBIND, +}; + +struct nfulnl_msg_config_cmd { + u_int8_t command; /* nfulnl_msg_config_cmds */ +} __attribute__ ((packed)); + +struct nfulnl_msg_config_mode { + u_int32_t copy_range; + u_int8_t copy_mode; + u_int8_t _pad; +} __attribute__ ((packed)); + +enum nfulnl_attr_config { + NFULA_CFG_UNSPEC, + NFULA_CFG_CMD, /* nfulnl_msg_config_cmd */ + NFULA_CFG_MODE, /* nfulnl_msg_config_mode */ + NFULA_CFG_NLBUFSIZ, /* u_int32_t buffer size */ + NFULA_CFG_TIMEOUT, /* u_int32_t in 1/100 s */ + NFULA_CFG_QTHRESH, /* u_int32_t */ + __NFULA_CFG_MAX +}; +#define NFULA_CFG_MAX (__NFULA_CFG_MAX -1) + +#define NFULNL_COPY_NONE 0x00 +#define NFULNL_COPY_META 0x01 +#define NFULNL_COPY_PACKET 0x02 + +#endif /* _NFNETLINK_LOG_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f0eb23e5c5f1..8296b38bf270 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -11,3 +11,14 @@ config NETFILTER_NETLINK_QUEUE If this option isenabled, the kernel will include support for queueing packets via NFNETLINK. +config NETFILTER_NETLINK_LOG + tristate "Netfilter LOG over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for logging packets via NFNETLINK. + + This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms, + and is also scheduled to replace the old syslog-based ipt_LOG + and ip6t_LOG modules. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 14a0b187e75e..c41caebc4a7c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c new file mode 100644 index 000000000000..f41045e385ae --- /dev/null +++ b/net/netfilter/nfnetlink_log.c @@ -0,0 +1,995 @@ +/* + * This is a module which is used for logging packets to userspace via + * nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ipt_ULOG.c: + * (C) 2000-2004 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFULNL_NLBUFSIZ_DEFAULT 4096 +#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ +#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ + +#define PRINTR(x, args...) do { if (net_ratelimit()) \ + printk(x, ## args); } while (0); + +#if 0 +#define UDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define UDEBUG(x, ...) +#endif + +struct nfulnl_instance { + struct hlist_node hlist; /* global list of instances */ + spinlock_t lock; + atomic_t use; /* use count */ + + unsigned int qlen; /* number of nlmsgs in skb */ + struct sk_buff *skb; /* pre-allocatd skb */ + struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ + struct timer_list timer; + int peer_pid; /* PID of the peer process */ + + /* configurable parameters */ + unsigned int flushtimeout; /* timeout until queue flush */ + unsigned int nlbufsiz; /* netlink buffer allocation size */ + unsigned int qthreshold; /* threshold of the queue */ + u_int32_t copy_range; + u_int16_t group_num; /* number of this queue */ + u_int8_t copy_mode; +}; + +static DEFINE_RWLOCK(instances_lock); + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; +static unsigned int hash_init; + +static inline u_int8_t instance_hashfn(u_int16_t group_num) +{ + return ((group_num & 0xff) % INSTANCE_BUCKETS); +} + +static struct nfulnl_instance * +__instance_lookup(u_int16_t group_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfulnl_instance *inst; + + UDEBUG("entering (group_num=%u)\n", group_num); + + head = &instance_table[instance_hashfn(group_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->group_num == group_num) + return inst; + } + return NULL; +} + +static inline void +instance_get(struct nfulnl_instance *inst) +{ + atomic_inc(&inst->use); +} + +static struct nfulnl_instance * +instance_lookup_get(u_int16_t group_num) +{ + struct nfulnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(group_num); + if (inst) + instance_get(inst); + read_unlock_bh(&instances_lock); + + return inst; +} + +static void +instance_put(struct nfulnl_instance *inst) +{ + if (inst && atomic_dec_and_test(&inst->use)) { + UDEBUG("kfree(inst=%p)\n", inst); + kfree(inst); + } +} + +static void nfulnl_timer(unsigned long data); + +static struct nfulnl_instance * +instance_create(u_int16_t group_num, int pid) +{ + struct nfulnl_instance *inst; + + UDEBUG("entering (group_num=%u, pid=%d)\n", group_num, + pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(group_num)) { + inst = NULL; + UDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + INIT_HLIST_NODE(&inst->hlist); + inst->lock = SPIN_LOCK_UNLOCKED; + /* needs to be two, since we _put() after creation */ + atomic_set(&inst->use, 2); + + init_timer(&inst->timer); + inst->timer.function = nfulnl_timer; + inst->timer.data = (unsigned long)inst; + /* don't start timer yet. (re)start it with every packet */ + + inst->peer_pid = pid; + inst->group_num = group_num; + + inst->qthreshold = NFULNL_QTHRESH_DEFAULT; + inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; + inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; + inst->copy_mode = NFULNL_COPY_PACKET; + inst->copy_range = 0xffff; + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(group_num)]); + + UDEBUG("newly added node: %p, next=%p\n", &inst->hlist, + inst->hlist.next); + + write_unlock_bh(&instances_lock); + + return inst; + +out_free: + instance_put(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static int __nfulnl_send(struct nfulnl_instance *inst); + +static void +_instance_destroy2(struct nfulnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + UDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->group_num); + + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending packets from skb */ + + spin_lock_bh(&inst->lock); + if (inst->skb) { + if (inst->qlen) + __nfulnl_send(inst); + if (inst->skb) { + kfree_skb(inst->skb); + inst->skb = NULL; + } + } + spin_unlock_bh(&inst->lock); + + /* and finally put the refcount */ + instance_put(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfulnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfulnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + +static int +nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, + unsigned int range) +{ + int status = 0; + + spin_lock_bh(&inst->lock); + + switch (mode) { + case NFULNL_COPY_NONE: + case NFULNL_COPY_META: + inst->copy_mode = mode; + inst->copy_range = 0; + break; + + case NFULNL_COPY_PACKET: + inst->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + inst->copy_range = 0xffff; + else + inst->copy_range = range; + break; + + default: + status = -EINVAL; + break; + } + + spin_unlock_bh(&inst->lock); + + return status; +} + +static int +nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) +{ + int status; + + spin_lock_bh(&inst->lock); + if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT) + status = -ERANGE; + else if (nlbufsiz > 131072) + status = -ERANGE; + else { + inst->nlbufsiz = nlbufsiz; + status = 0; + } + spin_unlock_bh(&inst->lock); + + return status; +} + +static int +nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) +{ + spin_lock_bh(&inst->lock); + inst->flushtimeout = timeout; + spin_unlock_bh(&inst->lock); + + return 0; +} + +static int +nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) +{ + spin_lock_bh(&inst->lock); + inst->qthreshold = qthresh; + spin_unlock_bh(&inst->lock); + + return 0; +} + +static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, + unsigned int pkt_size) +{ + struct sk_buff *skb; + + UDEBUG("entered (%u, %u)\n", inst_size, pkt_size); + + /* alloc skb which should be big enough for a whole multipart + * message. WARNING: has to be <= 128k due to slab restrictions */ + + skb = alloc_skb(inst_size, GFP_ATOMIC); + if (!skb) { + PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n", + inst_size); + + /* try to allocate only as much as we need for current + * packet */ + + skb = alloc_skb(pkt_size, GFP_ATOMIC); + if (!skb) + PRINTR("nfnetlink_log: can't even alloc %u bytes\n", + pkt_size); + } + + return skb; +} + +static int +__nfulnl_send(struct nfulnl_instance *inst) +{ + int status; + + if (timer_pending(&inst->timer)) + del_timer(&inst->timer); + + if (inst->qlen > 1) + inst->lastnlh->nlmsg_type = NLMSG_DONE; + + status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + if (status < 0) { + UDEBUG("netlink_unicast() failed\n"); + /* FIXME: statistics */ + } + + inst->qlen = 0; + inst->skb = NULL; + inst->lastnlh = NULL; + + return status; +} + +static void nfulnl_timer(unsigned long data) +{ + struct nfulnl_instance *inst = (struct nfulnl_instance *)data; + + UDEBUG("timer function called, flushing buffer\n"); + + spin_lock_bh(&inst->lock); + __nfulnl_send(inst); + instance_put(inst); + spin_unlock_bh(&inst->lock); +} + +static inline int +__build_packet_message(struct nfulnl_instance *inst, + const struct sk_buff *skb, + unsigned int data_len, + unsigned int pf, + unsigned int hooknum, + const struct net_device *indev, + const struct net_device *outdev, + const struct nf_loginfo *li, + const char *prefix) +{ + unsigned char *old_tail; + struct nfulnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + u_int32_t tmp_uint; + + UDEBUG("entered\n"); + + old_tail = inst->skb->tail; + nlh = NLMSG_PUT(inst->skb, 0, 0, + NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(inst->group_num); + + pmsg.hw_protocol = htons(skb->protocol); + pmsg.hook = hooknum; + + NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (prefix) { + int slen = strlen(prefix); + if (slen > NFULNL_PREFIXLEN) + slen = NFULNL_PREFIXLEN; + NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix); + } + + if (indev) { + tmp_uint = htonl(indev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + } + + if (outdev) { + tmp_uint = htonl(outdev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + } + + if (skb->nfmark) { + tmp_uint = htonl(skb->nfmark); + NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); + } + + if (indev && skb->dev && skb->dev->hard_header_parse) { + struct nfulnl_msg_packet_hw phw; + + phw.hw_addrlen = + skb->dev->hard_header_parse((struct sk_buff *)skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + } + + if (skb->stamp.tv_sec) { + struct nfulnl_msg_packet_timestamp ts; + + ts.sec = cpu_to_be64(skb->stamp.tv_sec); + ts.usec = cpu_to_be64(skb->stamp.tv_usec); + + NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); + } + + /* UID */ + if (skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid); + /* need to unlock here since NFA_PUT may goto */ + read_unlock_bh(&skb->sk->sk_callback_lock); + NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); + } else + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size)); + nfa->nfa_type = NFULA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = inst->skb->tail - old_tail; + return 0; + +nlmsg_failure: + UDEBUG("nlmsg_failure\n"); +nfattr_failure: + PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); + return -1; +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_ULOG, + .u = { + .ulog = { + .copy_len = 0xffff, + .group = 0, + .qthreshold = 1, + }, + }, +}; + +/* log handler for internal netfilter logging api */ +static void +nfulnl_log_packet(unsigned int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li_user, + const char *prefix) +{ + unsigned int size, data_len; + struct nfulnl_instance *inst; + const struct nf_loginfo *li; + unsigned int qthreshold; + unsigned int nlbufsiz; + + if (li_user && li_user->type == NF_LOG_TYPE_ULOG) + li = li_user; + else + li = &default_loginfo; + + inst = instance_lookup_get(li->u.ulog.group); + if (!inst) + inst = instance_lookup_get(0); + if (!inst) { + PRINTR("nfnetlink_log: trying to log packet, " + "but no instance for group %u\n", li->u.ulog.group); + return; + } + + /* all macros expand to constant values at compile time */ + /* FIXME: do we want to make the size calculation conditional based on + * what is actually present? way more branches and checks, but more + * memory efficient... */ + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* mark */ + + NFA_SPACE(sizeof(u_int32_t)) /* uid */ + + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); + + UDEBUG("initial size=%u\n", size); + + spin_lock_bh(&inst->lock); + + qthreshold = inst->qthreshold; + /* per-rule qthreshold overrides per-instance */ + if (qthreshold > li->u.ulog.qthreshold) + qthreshold = li->u.ulog.qthreshold; + + switch (inst->copy_mode) { + case NFULNL_COPY_META: + case NFULNL_COPY_NONE: + data_len = 0; + break; + + case NFULNL_COPY_PACKET: + if (inst->copy_range == 0 + || inst->copy_range > skb->len) + data_len = skb->len; + else + data_len = inst->copy_range; + + size += NFA_SPACE(data_len); + UDEBUG("copy_packet, therefore size now %u\n", size); + break; + + default: + spin_unlock_bh(&inst->lock); + instance_put(inst); + return; + } + + if (size > inst->nlbufsiz) + nlbufsiz = size; + else + nlbufsiz = inst->nlbufsiz; + + if (!inst->skb) { + if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { + UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", + inst->nlbufsiz, size); + goto alloc_failure; + } + } else if (inst->qlen >= qthreshold || + size > skb_tailroom(inst->skb)) { + /* either the queue len is too high or we don't have + * enough room in the skb left. flush to userspace. */ + UDEBUG("flushing old skb\n"); + + __nfulnl_send(inst); + + if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { + UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", + inst->nlbufsiz, size); + goto alloc_failure; + } + } + + UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold); + inst->qlen++; + + __build_packet_message(inst, skb, data_len, pf, + hooknum, in, out, li, prefix); + + /* timer_pending always called within inst->lock, so there + * is no chance of a race here */ + if (!timer_pending(&inst->timer)) { + instance_get(inst); + inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); + add_timer(&inst->timer); + } + spin_unlock_bh(&inst->lock); + + return; + +alloc_failure: + spin_unlock_bh(&inst->lock); + instance_put(inst); + UDEBUG("error allocating skb\n"); + /* FIXME: statistics */ +} + +static int +nfulnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfulnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + UDEBUG("node = %p\n", inst); + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfulnl_rtnl_notifier = { + .notifier_call = nfulnl_rcv_nl_event, +}; + +static int +nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static struct nf_logger nfulnl_logger = { + .name = "nfnetlink_log", + .logfn = &nfulnl_log_packet, + .me = THIS_MODULE, +}; + +static const int nfula_min[NFULA_MAX] = { + [NFULA_PACKET_HDR-1] = sizeof(struct nfulnl_msg_packet_hdr), + [NFULA_MARK-1] = sizeof(u_int32_t), + [NFULA_TIMESTAMP-1] = sizeof(struct nfulnl_msg_packet_timestamp), + [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t), + [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t), + [NFULA_HWADDR-1] = sizeof(struct nfulnl_msg_packet_hw), + [NFULA_PAYLOAD-1] = 0, + [NFULA_PREFIX-1] = 0, + [NFULA_UID-1] = sizeof(u_int32_t), +}; + +static const int nfula_cfg_min[NFULA_CFG_MAX] = { + [NFULA_CFG_CMD-1] = sizeof(struct nfulnl_msg_config_cmd), + [NFULA_CFG_MODE-1] = sizeof(struct nfulnl_msg_config_mode), + [NFULA_CFG_TIMEOUT-1] = sizeof(u_int32_t), + [NFULA_CFG_QTHRESH-1] = sizeof(u_int32_t), + [NFULA_CFG_NLBUFSIZ-1] = sizeof(u_int32_t), +}; + +static int +nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t group_num = ntohs(nfmsg->res_id); + struct nfulnl_instance *inst; + int ret = 0; + + UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) { + UDEBUG("bad attribute size\n"); + return -EINVAL; + } + + inst = instance_lookup_get(group_num); + if (nfula[NFULA_CFG_CMD-1]) { + u_int8_t pf = nfmsg->nfgen_family; + struct nfulnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]); + UDEBUG("found CFG_CMD for\n"); + + switch (cmd->command) { + case NFULNL_CFG_CMD_BIND: + if (inst) { + ret = -EBUSY; + goto out_put; + } + + inst = instance_create(group_num, + NETLINK_CB(skb).pid); + if (!inst) { + ret = -EINVAL; + goto out_put; + } + break; + case NFULNL_CFG_CMD_UNBIND: + if (!inst) { + ret = -ENODEV; + goto out_put; + } + + if (inst->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto out_put; + } + + instance_destroy(inst); + break; + case NFULNL_CFG_CMD_PF_BIND: + UDEBUG("registering log handler for pf=%u\n", pf); + ret = nf_log_register(pf, &nfulnl_logger); + break; + case NFULNL_CFG_CMD_PF_UNBIND: + UDEBUG("unregistering log handler for pf=%u\n", pf); + /* This is a bug and a feature. We cannot unregister + * other handlers, like nfnetlink_inst can */ + nf_log_unregister_pf(pf); + break; + default: + ret = -EINVAL; + break; + } + } else { + if (!inst) { + UDEBUG("no config command, and no instance for " + "group=%u pid=%u =>ENOENT\n", + group_num, NETLINK_CB(skb).pid); + ret = -ENOENT; + goto out_put; + } + + if (inst->peer_pid != NETLINK_CB(skb).pid) { + UDEBUG("no config command, and wrong pid\n"); + ret = -EPERM; + goto out_put; + } + } + + if (nfula[NFULA_CFG_MODE-1]) { + struct nfulnl_msg_config_mode *params; + params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); + + nfulnl_set_mode(inst, params->copy_mode, + ntohs(params->copy_range)); + } + + if (nfula[NFULA_CFG_TIMEOUT-1]) { + u_int32_t timeout = + *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); + + nfulnl_set_timeout(inst, ntohl(timeout)); + } + + if (nfula[NFULA_CFG_NLBUFSIZ-1]) { + u_int32_t nlbufsiz = + *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); + + nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); + } + + if (nfula[NFULA_CFG_QTHRESH-1]) { + u_int32_t qthresh = + *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); + + nfulnl_set_qthresh(inst, ntohl(qthresh)); + } + +out_put: + instance_put(inst); + return ret; +} + +static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { + [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfulnl_subsys = { + .name = "log", + .subsys_id = NFNL_SUBSYS_ULOG, + .cb_count = NFULNL_MSG_MAX, + .attr_count = NFULA_MAX, + .cb = nfulnl_cb, +}; + +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfulnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", + inst->group_num, + inst->peer_pid, inst->qlen, + inst->copy_mode, inst->copy_range, + inst->flushtimeout, atomic_read(&inst->use)); +} + +static struct seq_operations nful_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nful_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct iter_state *is; + int ret; + + is = kmalloc(sizeof(*is), GFP_KERNEL); + if (!is) + return -ENOMEM; + memset(is, 0, sizeof(*is)); + ret = seq_open(file, &nful_seq_ops); + if (ret < 0) + goto out_free; + seq = file->private_data; + seq->private = is; + return ret; +out_free: + kfree(is); + return ret; +} + +static struct file_operations nful_file_ops = { + .owner = THIS_MODULE, + .open = nful_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + +static int +init_or_cleanup(int init) +{ + int i, status = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nful; +#endif + + if (!init) + goto cleanup; + + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + + /* it's not really all that important to have a random value, so + * we can do this from the init function, even if there hasn't + * been that much entropy yet */ + get_random_bytes(&hash_init, sizeof(hash_init)); + + netlink_register_notifier(&nfulnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfulnl_subsys); + if (status < 0) { + printk(KERN_ERR "log: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +#ifdef CONFIG_PROC_FS + proc_nful = create_proc_entry("nfnetlink_log", 0440, + proc_net_netfilter); + if (!proc_nful) + goto cleanup_subsys; + proc_nful->proc_fops = &nful_file_ops; +#endif + + return status; + +cleanup: + nf_log_unregister_logger(&nfulnl_logger); +#ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_log", proc_net_netfilter); +cleanup_subsys: +#endif + nfnetlink_subsys_unregister(&nfulnl_subsys); +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfulnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter userspace logging"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index eab309e3d42e..d7b0330d64b4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1041,6 +1041,7 @@ cleanup: nf_unregister_queue_handlers(nfqnl_enqueue_packet); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", proc_net_netfilter); cleanup_subsys: #endif nfnetlink_subsys_unregister(&nfqnl_subsys); From 304a16180fb6d2b153b45f6fbbcec1fa814496e5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:20 -0700 Subject: [PATCH 297/584] [INET]: Move the TCP ehash functions to include/net/inet_hashtables.h To be shared with DCCP (and others), this is the start of a series of patches that will expose the already generic TCP hash table routines. The few changes noticed when calling gcc -S before/after on a pentium4 were of this type: movl 40(%esp), %edx cmpl %esi, 472(%edx) je .L168 - pushl $291 + pushl $272 pushl $.LC0 pushl $.LC1 pushl $.LC2 [acme@toy net-2.6.14]$ size net/ipv4/tcp_ipv4.before.o net/ipv4/tcp_ipv4.after.o text data bss dec hex filename 17804 516 140 18460 481c net/ipv4/tcp_ipv4.before.o 17804 516 140 18460 481c net/ipv4/tcp_ipv4.after.o Holler if some weird architecture has issues with things like this 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 40 +++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 28 +++++------------------- 2 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 include/net/inet_hashtables.h diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h new file mode 100644 index 000000000000..c4c9e39f4505 --- /dev/null +++ b/include/net/inet_hashtables.h @@ -0,0 +1,40 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET_HASHTABLES_H +#define _INET_HASHTABLES_H + +#include + +static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport, + const int ehash_size) +{ + int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h & (ehash_size - 1); +} + +static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +#endif /* _INET_HASHTABLES_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ae6fad99a9a9..c03d7e9688c8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -64,6 +64,7 @@ #include #include +#include #include #include #include @@ -104,26 +105,6 @@ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, - __u32 faddr, __u16 fport) -{ - int h = (laddr ^ lport) ^ (faddr ^ fport); - h ^= h >> 16; - h ^= h >> 8; - return h & (tcp_ehash_size - 1); -} - -static __inline__ int tcp_sk_hashfn(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - __u32 laddr = inet->rcv_saddr; - __u16 lport = inet->num; - __u32 faddr = inet->daddr; - __u16 fport = inet->dport; - - return tcp_hashfn(laddr, lport, faddr, fport); -} - /* Allocate and initialize a new TCP local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ @@ -367,7 +348,8 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { - list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain; + sk->sk_hashent = inet_sk_ehashfn(sk, tcp_ehash_size); + list = &tcp_ehash[sk->sk_hashent].chain; lock = &tcp_ehash[sk->sk_hashent].lock; write_lock(lock); } @@ -500,7 +482,7 @@ static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - int hash = tcp_hashfn(daddr, hnum, saddr, sport); + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_ehash_size); head = &tcp_ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { @@ -563,7 +545,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - int hash = tcp_hashfn(daddr, lport, saddr, inet->dport); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; From 0f7ff9274e72fd254fbd1ab117bbc1db6e7cdb34 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:44 -0700 Subject: [PATCH 298/584] [INET]: Just rename the TCP hashtable functions/structs to inet_ This is to break down the complexity of the series of patches, making it very clear that this one just does: 1. renames tcp_ prefixed hashtable functions and data structures that were already mostly generic to inet_ to share it with DCCP and other INET transport protocols. 2. Removes not used functions (__tb_head & tb_head) 3. Removes some leftover prototypes in the headers (tcp_bucket_unlock & tcp_v4_build_header) Next changesets will move tcp_sk(sk)->bind_hash to inet_sock so that we can make functions such as tcp_inherit_port, __tcp_inherit_port, tcp_v4_get_port, __tcp_put_port, generic and get others like tcp_destroy_sock closer to generic (tcp_orphan_count will go to sk->sk_prot to allow this). Eventually most of these functions will be used passing the transport protocol inet_hashinfo structure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 91 +++++++++++++++------------------ net/ipv4/tcp.c | 15 +++--- net/ipv4/tcp_diag.c | 4 +- net/ipv4/tcp_ipv4.c | 106 +++++++++++++++++++++------------------ net/ipv4/tcp_minisocks.c | 16 +++--- net/ipv6/tcp_ipv6.c | 42 ++++++++-------- 7 files changed, 138 insertions(+), 138 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e4fd82e42104..ec580a560e8c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct tcp_bind_bucket *bind_hash; + struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c769adb0463..6c9f6f7cab5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,13 +44,13 @@ * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */ -struct tcp_ehash_bucket { +struct inet_ehash_bucket { rwlock_t lock; struct hlist_head chain; } __attribute__((__aligned__(8))); /* This is for listening sockets, thus all sockets which possess wildcards. */ -#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ /* There are a few simple rules, which allow for local port reuse by * an application. In essence: @@ -83,31 +83,22 @@ struct tcp_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ -struct tcp_bind_bucket { +struct inet_bind_bucket { unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; -#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) -struct tcp_bind_hashbucket { +struct inet_bind_hashbucket { spinlock_t lock; struct hlist_head chain; }; -static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); -} - -static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_empty(&head->chain) ? NULL : __tb_head(head); -} - -extern struct tcp_hashinfo { +struct inet_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * @@ -116,21 +107,21 @@ extern struct tcp_hashinfo { * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ - struct tcp_ehash_bucket *__tcp_ehash; + struct inet_ehash_bucket *ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ - struct tcp_bind_hashbucket *__tcp_bhash; + struct inet_bind_hashbucket *bhash; - int __tcp_bhash_size; - int __tcp_ehash_size; + int bhash_size; + int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ - struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -138,36 +129,39 @@ extern struct tcp_hashinfo { * Now align to a new cache line as all the following members * are often dirty. */ - rwlock_t __tcp_lhash_lock ____cacheline_aligned; - atomic_t __tcp_lhash_users; - wait_queue_head_t __tcp_lhash_wait; - spinlock_t __tcp_portalloc_lock; -} tcp_hashinfo; + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; -#define tcp_ehash (tcp_hashinfo.__tcp_ehash) -#define tcp_bhash (tcp_hashinfo.__tcp_bhash) -#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) -#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) -#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) +extern struct inet_hashinfo tcp_hashinfo; +#define tcp_ehash (tcp_hashinfo.ehash) +#define tcp_bhash (tcp_hashinfo.bhash) +#define tcp_ehash_size (tcp_hashinfo.ehash_size) +#define tcp_bhash_size (tcp_hashinfo.bhash_size) +#define tcp_listening_hash (tcp_hashinfo.listening_hash) +#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) +#define tcp_lhash_users (tcp_hashinfo.lhash_users) +#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) +#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum); -extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); -extern void tcp_bucket_unlock(struct sock *sk); +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); extern int tcp_port_rover; /* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) { - return (lport & (tcp_bhash_size - 1)); + return lport & (bhash_size - 1); } -extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, +extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); #if (BITS_PER_LONG == 64) @@ -212,7 +206,7 @@ struct tcp_tw_bucket { __u32 tw_ts_recent; long tw_ts_recent_stamp; unsigned long tw_ttd; - struct tcp_bind_bucket *tw_tb; + struct inet_bind_bucket *tw_tb; struct hlist_node tw_death_node; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; @@ -366,14 +360,14 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) /* These can have wildcards, don't try too hard. */ -static __inline__ int tcp_lhashfn(unsigned short num) +static inline int inet_lhashfn(const unsigned short num) { - return num & (TCP_LHTABLE_SIZE - 1); + return num & (INET_LHTABLE_SIZE - 1); } -static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) +static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return tcp_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(inet_sk(sk)->num); } #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -799,9 +793,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_build_header(struct sock *sk, - struct sk_buff *skb); - extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 20159a3dafb3..1ec03db7dcd9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -272,6 +272,9 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); kmem_cache_t *tcp_bucket_cachep; + +EXPORT_SYMBOL_GPL(tcp_bucket_cachep); + kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); @@ -2259,7 +2262,7 @@ void __init tcp_init(void) sizeof(skb->cb)); tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct tcp_bind_bucket), + sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_bucket_cachep) @@ -2277,9 +2280,9 @@ void __init tcp_init(void) * * The methodology is similar to that of the buffer cache. */ - tcp_ehash = (struct tcp_ehash_bucket *) + tcp_ehash = alloc_large_system_hash("TCP established", - sizeof(struct tcp_ehash_bucket), + sizeof(struct inet_ehash_bucket), thash_entries, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2294,9 +2297,9 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_ehash[i].chain); } - tcp_bhash = (struct tcp_bind_hashbucket *) + tcp_bhash = alloc_large_system_hash("TCP bind", - sizeof(struct tcp_bind_hashbucket), + sizeof(struct inet_bind_hashbucket), tcp_ehash_size, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2315,7 +2318,7 @@ void __init tcp_init(void) * on available memory. */ for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); + (tcp_bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4) { diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f79bd11a4701..5bb6a0f1c77b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -590,7 +590,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; tcp_listen_lock(); - for (i = s_i; i < TCP_LHTABLE_SIZE; i++) { + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; @@ -646,7 +646,7 @@ skip_listen_ht: return skb->len; for (i = s_i; i < tcp_ehash_size; i++) { - struct tcp_ehash_bucket *head = &tcp_ehash[i]; + struct inet_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; struct hlist_node *node; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c03d7e9688c8..4138630556e3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -89,12 +89,11 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { - .__tcp_lhash_lock = RW_LOCK_UNLOCKED, - .__tcp_lhash_users = ATOMIC_INIT(0), - .__tcp_lhash_wait - = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), - .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED +struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, }; /* @@ -105,14 +104,14 @@ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Allocate and initialize a new TCP local port bind bucket. +/* Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum) +struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum) { - struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, - SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); if (tb) { tb->port = snum; tb->fastreuse = 0; @@ -123,20 +122,21 @@ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, } /* Caller must hold hashbucket lock for this tb with local BH disabled */ -void tcp_bucket_destroy(struct tcp_bind_bucket *tb) +void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - kmem_cache_free(tcp_bucket_cachep, tb); + kmem_cache_free(cachep, tb); } } /* Caller must disable local BH processing. */ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { - struct tcp_bind_hashbucket *head = - &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = + &tcp_bhash[inet_bhashfn(inet_sk(child)->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; @@ -152,15 +152,15 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) local_bh_enable(); } -void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, - unsigned short snum) +void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum) { inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); tcp_sk(sk)->bind_hash = tb; } -static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) +static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; @@ -190,9 +190,9 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) */ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; + struct inet_bind_hashbucket *head; struct hlist_node *node; - struct tcp_bind_bucket *tb; + struct inet_bind_bucket *tb; int ret; local_bh_disable(); @@ -211,9 +211,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -238,9 +238,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -261,7 +261,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -290,15 +290,16 @@ fail: static void __tcp_put_port(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = &tcp_bhash[inet_bhashfn(inet->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; __sk_del_bind_node(sk); tcp_sk(sk)->bind_hash = NULL; inet->num = 0; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -344,7 +345,7 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -381,7 +382,7 @@ void tcp_unhash(struct sock *sk) tcp_listen_wlock(); lock = &tcp_lhash_lock; } else { - struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; lock = &head->lock; write_lock_bh(&head->lock); } @@ -401,8 +402,10 @@ void tcp_unhash(struct sock *sk) * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, - unsigned short hnum, int dif) +static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; struct hlist_node *node; @@ -438,14 +441,15 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, } /* Optimize the common listener case. */ -static inline struct sock *tcp_v4_lookup_listener(u32 daddr, - unsigned short hnum, int dif) +static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *sk = NULL; struct hlist_head *head; read_lock(&tcp_lhash_lock); - head = &tcp_listening_hash[tcp_lhashfn(hnum)]; + head = &tcp_listening_hash[inet_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -470,11 +474,13 @@ sherry_cache: * Local BH must be disabled here. */ -static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, - u32 daddr, u16 hnum, - int dif) +static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, + const u16 sport, + const u32 daddr, + const u16 hnum, + const int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(sport, hnum); struct sock *sk; @@ -546,7 +552,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -639,9 +645,9 @@ static inline u32 connect_port_offset(const struct sock *sk) */ static inline int tcp_v4_hash_connect(struct sock *sk) { - unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -658,14 +664,14 @@ static inline int tcp_v4_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -678,7 +684,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -713,7 +719,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -2055,7 +2061,7 @@ start_req: } read_unlock_bh(&tp->accept_queue.syn_wait_lock); } - if (++st->bucket < TCP_LHTABLE_SIZE) { + if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_listening_hash[st->bucket]); goto get_sk; } @@ -2506,7 +2512,7 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(tcp_bind_hash); -EXPORT_SYMBOL(tcp_bucket_create); +EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_listen_wlock); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c46a553c4af..1df6cd46066b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,9 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead; - struct tcp_bind_hashbucket *bhead; - struct tcp_bind_bucket *tb; + struct inet_ehash_bucket *ehead; + struct inet_bind_hashbucket *bhead; + struct inet_bind_bucket *tb; /* Unlink from established hashes. */ ehead = &tcp_ehash[tw->tw_hashent]; @@ -76,12 +76,12 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)]; + bhead = &tcp_bhash[inet_bhashfn(tw->tw_num, tcp_bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG @@ -296,14 +296,14 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; - struct tcp_bind_hashbucket *bhead; + struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; + struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; + bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = tcp_sk(sk)->bind_hash; BUG_TRAP(tcp_sk(sk)->bind_hash); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4e32a8496be3..31f50fb29ffb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -98,11 +98,11 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) return tcp_v6_hashfn(laddr, lport, faddr, fport); } -static inline int tcp_v6_bind_conflict(struct sock *sk, - struct tcp_bind_bucket *tb) +static inline int tcp_v6_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { - struct sock *sk2; - struct hlist_node *node; + const struct sock *sk2; + const struct hlist_node *node; /* We must walk the whole port owner list in this case. -DaveM */ sk_for_each_bound(sk2, node, &tb->owners) { @@ -126,8 +126,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk, */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; struct hlist_node *node; int ret; @@ -146,9 +146,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -171,9 +171,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) /* OK, here is the one we will use. */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -192,7 +192,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -224,7 +224,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -264,7 +264,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor hiscore=0; read_lock(&tcp_lhash_lock); - sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { + sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -305,7 +305,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; struct sock *sk; struct hlist_node *node; __u32 ports = TCP_COMBINED_PORTS(sport, hnum); @@ -461,7 +461,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -540,8 +540,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk) static int tcp_v6_hash_connect(struct sock *sk) { unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -558,14 +558,14 @@ static int tcp_v6_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -578,7 +578,7 @@ static int tcp_v6_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -613,7 +613,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); From 77d8bf9c6208eb535f05718168ffcc476be0ca8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:00:51 -0700 Subject: [PATCH 299/584] [INET]: Move the TCP hashtable functions/structs to inet_hashtables.[ch] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_common.h | 5 ++ include/net/inet_hashtables.h | 122 ++++++++++++++++++++++++++++++++++ include/net/tcp.h | 120 +-------------------------------- net/ipv4/Makefile | 2 +- net/ipv4/inet_hashtables.c | 51 ++++++++++++++ net/ipv4/tcp_ipv4.c | 26 -------- 6 files changed, 181 insertions(+), 145 deletions(-) create mode 100644 net/ipv4/inet_hashtables.c diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 1fbd94d8a316..f943306ce5ff 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops; * INET4 prototypes used by INET6 */ +struct msghdr; +struct sock; +struct sockaddr; +struct socket; + extern void inet_remove_sock(struct sock *sk1); extern void inet_put_sock(unsigned short num, struct sock *sk); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c4c9e39f4505..3a6c11ca421d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,8 +14,107 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include +#include +#include +#include #include +/* This is for all connections with a full identity, no wildcards. + * New scheme, half the table is for TIME_WAIT, the other half is + * for the rest. I'll experiment with dynamic table growth later. + */ +struct inet_ehash_bucket { + rwlock_t lock; + struct hlist_head chain; +} __attribute__((__aligned__(8))); + +/* There are a few simple rules, which allow for local port reuse by + * an application. In essence: + * + * 1) Sockets bound to different interfaces may share a local port. + * Failing that, goto test 2. + * 2) If all sockets have sk->sk_reuse set, and none of them are in + * TCP_LISTEN state, the port may be shared. + * Failing that, goto test 3. + * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local + * address, and none of them are the same, the port may be + * shared. + * Failing this, the port cannot be shared. + * + * The interesting point, is test #2. This is what an FTP server does + * all day. To optimize this case we use a specific flag bit defined + * below. As we add sockets to a bind bucket list, we perform a + * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) + * As long as all sockets added to a bind bucket pass this test, + * the flag bit will be set. + * The resulting situation is that tcp_v[46]_verify_bind() can just check + * for this flag bit, if it is set and the socket trying to bind has + * sk->sk_reuse set, we don't even have to walk the owners list at all, + * we return that it is ok to bind this socket to the requested local port. + * + * Sounds like a lot of work, but it is worth it. In a more naive + * implementation (ie. current FreeBSD etc.) the entire list of ports + * must be walked for each data port opened by an ftp server. Needless + * to say, this does not scale at all. With a couple thousand FTP + * users logged onto your box, isn't it nice to know that new data + * ports are created in O(1) time? I thought so. ;-) -DaveM + */ +struct inet_bind_bucket { + unsigned short port; + signed short fastreuse; + struct hlist_node node; + struct hlist_head owners; +}; + +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) + +struct inet_bind_hashbucket { + spinlock_t lock; + struct hlist_head chain; +}; + +/* This is for listening sockets, thus all sockets which possess wildcards. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ + +struct inet_hashinfo { + /* This is for sockets with full identity only. Sockets here will + * always be without wildcards and will have the following invariant: + * + * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE + * + * First half of the table is for sockets not in TIME_WAIT, second half + * is for TIME_WAIT sockets only. + */ + struct inet_ehash_bucket *ehash; + + /* Ok, let's try this, I give up, we do need a local binding + * TCP hash as well as the others for fast bind/connect. + */ + struct inet_bind_hashbucket *bhash; + + int bhash_size; + int ehash_size; + + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. + */ + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + + /* All the above members are written once at bootup and + * never written again _or_ are predominantly read-access. + * + * Now align to a new cache line as all the following members + * are often dirty. + */ + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; + static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, const __u32 faddr, const __u16 fport, const int ehash_size) @@ -37,4 +136,27 @@ static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); } +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); + +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) +{ + return lport & (bhash_size - 1); +} + +/* These can have wildcards, don't try too hard. */ +static inline int inet_lhashfn(const unsigned short num) +{ + return num & (INET_LHTABLE_SIZE - 1); +} + +static inline int inet_sk_listen_hashfn(const struct sock *sk) +{ + return inet_lhashfn(inet_sk(sk)->num); +} + #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c9f6f7cab5c..ff5d30ac2b06 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -40,101 +41,6 @@ #endif #include -/* This is for all connections with a full identity, no wildcards. - * New scheme, half the table is for TIME_WAIT, the other half is - * for the rest. I'll experiment with dynamic table growth later. - */ -struct inet_ehash_bucket { - rwlock_t lock; - struct hlist_head chain; -} __attribute__((__aligned__(8))); - -/* This is for listening sockets, thus all sockets which possess wildcards. */ -#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ - -/* There are a few simple rules, which allow for local port reuse by - * an application. In essence: - * - * 1) Sockets bound to different interfaces may share a local port. - * Failing that, goto test 2. - * 2) If all sockets have sk->sk_reuse set, and none of them are in - * TCP_LISTEN state, the port may be shared. - * Failing that, goto test 3. - * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local - * address, and none of them are the same, the port may be - * shared. - * Failing this, the port cannot be shared. - * - * The interesting point, is test #2. This is what an FTP server does - * all day. To optimize this case we use a specific flag bit defined - * below. As we add sockets to a bind bucket list, we perform a - * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) - * As long as all sockets added to a bind bucket pass this test, - * the flag bit will be set. - * The resulting situation is that tcp_v[46]_verify_bind() can just check - * for this flag bit, if it is set and the socket trying to bind has - * sk->sk_reuse set, we don't even have to walk the owners list at all, - * we return that it is ok to bind this socket to the requested local port. - * - * Sounds like a lot of work, but it is worth it. In a more naive - * implementation (ie. current FreeBSD etc.) the entire list of ports - * must be walked for each data port opened by an ftp server. Needless - * to say, this does not scale at all. With a couple thousand FTP - * users logged onto your box, isn't it nice to know that new data - * ports are created in O(1) time? I thought so. ;-) -DaveM - */ -struct inet_bind_bucket { - unsigned short port; - signed short fastreuse; - struct hlist_node node; - struct hlist_head owners; -}; - -#define inet_bind_bucket_for_each(tb, node, head) \ - hlist_for_each_entry(tb, node, head, node) - -struct inet_bind_hashbucket { - spinlock_t lock; - struct hlist_head chain; -}; - -struct inet_hashinfo { - /* This is for sockets with full identity only. Sockets here will - * always be without wildcards and will have the following invariant: - * - * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE - * - * First half of the table is for sockets not in TIME_WAIT, second half - * is for TIME_WAIT sockets only. - */ - struct inet_ehash_bucket *ehash; - - /* Ok, let's try this, I give up, we do need a local binding - * TCP hash as well as the others for fast bind/connect. - */ - struct inet_bind_hashbucket *bhash; - - int bhash_size; - int ehash_size; - - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * are often dirty. - */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - spinlock_t portalloc_lock; -}; - extern struct inet_hashinfo tcp_hashinfo; #define tcp_ehash (tcp_hashinfo.ehash) #define tcp_bhash (tcp_hashinfo.bhash) @@ -147,19 +53,8 @@ extern struct inet_hashinfo tcp_hashinfo; #define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct inet_bind_bucket * - inet_bind_bucket_create(kmem_cache_t *cachep, - struct inet_bind_hashbucket *head, - const unsigned short snum); -extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, - struct inet_bind_bucket *tb); -extern int tcp_port_rover; -/* These are AF independent. */ -static inline int inet_bhashfn(const __u16 lport, const int bhash_size) -{ - return lport & (bhash_size - 1); -} +extern int tcp_port_rover; extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); @@ -359,17 +254,6 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -/* These can have wildcards, don't try too hard. */ -static inline int inet_lhashfn(const unsigned short num) -{ - return num & (INET_LHTABLE_SIZE - 1); -} - -static inline int inet_sk_listen_hashfn(const struct sock *sk) -{ - return inet_lhashfn(inet_sk(sk)->num); -} - #define MAX_TCP_HEADER (128 + MAX_HEADER) /* diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 61c7386bcd2e..2d8d30e83eb0 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -4,7 +4,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ - ip_output.o ip_sockglue.o \ + ip_output.o ip_sockglue.o inet_hashtables.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c new file mode 100644 index 000000000000..343a890bd617 --- /dev/null +++ b/net/ipv4/inet_hashtables.c @@ -0,0 +1,51 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic INET transport hashtables + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include + +/* + * Allocate and initialize a new local port bind bucket. + * The bindhash mutex for snum's hash chain must be held here. + */ +struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum) +{ + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); + + if (tb != NULL) { + tb->port = snum; + tb->fastreuse = 0; + INIT_HLIST_HEAD(&tb->owners); + hlist_add_head(&tb->node, &head->chain); + } + return tb; +} + +EXPORT_SYMBOL(inet_bind_bucket_create); + +/* + * Caller must hold hashbucket lock for this tb with local BH disabled + */ +void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) +{ + if (hlist_empty(&tb->owners)) { + __hlist_del(&tb->node); + kmem_cache_free(cachep, tb); + } +} diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4138630556e3..58e36ed88f25 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,32 +104,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Allocate and initialize a new local port bind bucket. - * The bindhash mutex for snum's hash chain must be held here. - */ -struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, - struct inet_bind_hashbucket *head, - const unsigned short snum) -{ - struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); - if (tb) { - tb->port = snum; - tb->fastreuse = 0; - INIT_HLIST_HEAD(&tb->owners); - hlist_add_head(&tb->node, &head->chain); - } - return tb; -} - -/* Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) -{ - if (hlist_empty(&tb->owners)) { - __hlist_del(&tb->node); - kmem_cache_free(cachep, tb); - } -} - /* Caller must disable local BH processing. */ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { From a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:01:14 -0700 Subject: [PATCH 300/584] [INET]: Move bind_hash from tcp_sk to inet_sk This should really be in a inet_connection_sock, but I'm leaving it for a later optimization, when some more fields common to INET transport protocols now in tcp_sk or inet_sk will be chunked out into inet_connection_sock, for now its better to concentrate on getting the changes in the core merged to leave the DCCP tree with only DCCP specific code. Next changesets will take advantage of this move to generalise things like tcp_bind_hash, tcp_put_port, tcp_inherit_port, making the later receive a inet_hashinfo parameter, and even __tcp_tw_hashdance, etc in the future, when tcp_tw_bucket gets transformed into the struct timewait_sock hierarchy. tcp_destroy_sock also is eligible as soon as tcp_orphan_count gets moved to sk_prot. A cascade of incremental changes will ultimately make the tcp_lookup functions be fully generic. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 ++ include/linux/tcp.h | 1 - include/net/tcp.h | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 21 +++++++++++---------- net/ipv4/tcp_minisocks.c | 15 ++++++++------- net/ipv6/tcp_ipv6.c | 8 ++++---- 7 files changed, 28 insertions(+), 25 deletions(-) diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0f..2c54bbd3da76 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,6 +128,7 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -157,6 +158,7 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ + struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec580a560e8c..e70ab19652db 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ff5d30ac2b06..6c6c879e7e87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1266,7 +1266,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (tcp_sk(sk)->bind_hash && + if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) tcp_put_port(sk); /* fall through */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ec03db7dcd9..e54a410ca701 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1575,7 +1575,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); sk->sk_prot->destroy(sk); @@ -1802,7 +1802,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || tp->bind_hash); + BUG_TRAP(!inet->num || inet->bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 58e36ed88f25..10a9b3ae3442 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -113,9 +113,9 @@ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; sk_add_bind_node(child, &tb->owners); - tcp_sk(child)->bind_hash = tb; + inet_sk(child)->bind_hash = tb; spin_unlock(&head->lock); } @@ -129,9 +129,10 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - inet_sk(sk)->num = snum; + struct inet_sock *inet = inet_sk(sk); + inet->num = snum; sk_add_bind_node(sk, &tb->owners); - tcp_sk(sk)->bind_hash = tb; + inet->bind_hash = tb; } static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) @@ -246,9 +247,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -269,9 +270,9 @@ static void __tcp_put_port(struct sock *sk) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet->bind_hash; __sk_del_bind_node(sk); - tcp_sk(sk)->bind_hash = NULL; + inet->bind_hash = NULL; inet->num = 0; inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); @@ -694,7 +695,7 @@ ok: } head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); @@ -1940,7 +1941,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (tp->bind_hash) + if (inet_sk(sk)->bind_hash) tcp_put_port(sk); /* diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1df6cd46066b..267cea1087e5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -296,17 +296,17 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { + const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in + Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; + bhead = &tcp_bhash[inet_bhashfn(inet->num, tcp_bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = tcp_sk(sk)->bind_hash; - BUG_TRAP(tcp_sk(sk)->bind_hash); + tw->tw_tb = inet->bind_hash; + BUG_TRAP(inet->bind_hash); tw_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -694,6 +694,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if(newsk != NULL) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); + struct inet_sock *newinet = inet_sk(newsk); struct tcp_sock *newtp; struct sk_filter *filter; @@ -702,10 +703,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, /* SANITY */ sk_node_init(&newsk->sk_node); - tcp_sk(newsk)->bind_hash = NULL; + newinet->bind_hash = NULL; /* Clone the TCP header template */ - inet_sk(newsk)->dport = ireq->rmt_port; + newinet->dport = ireq->rmt_port; sock_lock_init(newsk); bh_lock_sock(newsk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 31f50fb29ffb..a8ca7ba06c1c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -204,9 +204,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -613,8 +613,8 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { From a86888b925299330053d20e0eba03ac4d2648c4b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:02:13 -0700 Subject: [PATCH 301/584] [NETFILTER]: Fix multiple problems with the conntrack event cache refcnt underflow: the reference count is decremented when a conntrack entry is removed from the hash but it is not incremented when entering new entries. missing protection of process context against softirq context: all cache operations need to locally disable softirqs to avoid races. Additionally the event cache can't be initialized when a packet enteres the conntrack code but needs to be initialized whenever we cache an event and the stored conntrack entry doesn't match the current one. incorrect flushing of the event cache in ip_ct_iterate_cleanup: without real locking we can't flush the cache for different CPUs without incurring races. The cache for different CPUs can only be flushed when no packets are going through the code. ip_ct_iterate_cleanup doesn't need to drop all references, so flushing is moved to the cleanup path. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 29 +++-- .../linux/netfilter_ipv4/ip_conntrack_core.h | 14 +-- net/ipv4/netfilter/ip_conntrack_core.c | 109 +++++++----------- net/ipv4/netfilter/ip_conntrack_standalone.c | 3 +- 4 files changed, 59 insertions(+), 96 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ff2c1c6001f9..088742befe49 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -411,6 +411,7 @@ struct ip_conntrack_stat #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS #include +#include struct ip_conntrack_ecache { struct ip_conntrack *ct; @@ -445,26 +446,24 @@ ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); } +extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct); +extern void __ip_ct_event_cache_init(struct ip_conntrack *ct); + static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) { - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); - - if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { - if (net_ratelimit()) { - printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); - dump_stack(); - } - } + struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct; + struct ip_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(ip_conntrack_ecache); + if (ct != ecache->ct) + __ip_ct_event_cache_init(ct); ecache->events |= event; + local_bh_enable(); } -extern void -ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); -extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); - static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) { @@ -483,9 +482,7 @@ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) {} static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) {} -static inline void ip_conntrack_deliver_cached_events_for( - struct ip_conntrack *ct) {} -static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {} static inline void ip_conntrack_expect_event(enum ip_conntrack_expect_events event, struct ip_conntrack_expect *exp) {} diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index fbf6c3e41647..dc4d2a0575de 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -44,18 +44,14 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; int ret = NF_ACCEPT; - if (ct && !is_confirmed(ct)) - ret = __ip_conntrack_confirm(pskb); - ip_conntrack_deliver_cached_events_for(ct); - + if (ct) { + if (!is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_ct_deliver_cached_events(ct); + } return ret; } -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -struct ip_conntrack_ecache; -extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); -#endif - extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); extern struct list_head *ip_conntrack_hash; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index d9fddae8d787..5c3f16eae2d8 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -85,73 +85,62 @@ struct notifier_block *ip_conntrack_expect_chain; DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); -static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) +/* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ +static inline void +__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) { + DEBUGP("ecache: delivering events for %p\n", ecache->ct); if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) notifier_call_chain(&ip_conntrack_chain, ecache->events, ecache->ct); ecache->events = 0; -} - -void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) -{ - __deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + ecache->ct = NULL; } /* Deliver all cached events for a particular conntrack. This is called * by code prior to async packet handling or freeing the skb */ -void -ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) +void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) { - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); - - if (!ct) - return; - - if (ecache->ct == ct) { - DEBUGP("ecache: delivering event for %p\n", ct); - __deliver_cached_events(ecache); - } else { - if (net_ratelimit()) - printk(KERN_WARNING "ecache: want to deliver for %p, " - "but cache has %p\n", ct, ecache->ct); - } - - /* signalize that events have already been delivered */ - ecache->ct = NULL; + struct ip_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(ip_conntrack_ecache); + if (ecache->ct == ct) + __ip_ct_deliver_cached_events(ecache); + local_bh_enable(); } -/* Deliver cached events for old pending events, if current conntrack != old */ -void ip_conntrack_event_cache_init(const struct sk_buff *skb) +void __ip_ct_event_cache_init(struct ip_conntrack *ct) { - struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); + struct ip_conntrack_ecache *ecache; /* take care of delivering potentially old events */ - if (ecache->ct != ct) { - enum ip_conntrack_info ctinfo; - /* we have to check, since at startup the cache is NULL */ - if (likely(ecache->ct)) { - DEBUGP("ecache: entered for different conntrack: " - "ecache->ct=%p, skb->nfct=%p. delivering " - "events\n", ecache->ct, ct); - __deliver_cached_events(ecache); - ip_conntrack_put(ecache->ct); - } else { - DEBUGP("ecache: entered for conntrack %p, " - "cache was clean before\n", ct); - } - - /* initialize for this conntrack/packet */ - ecache->ct = ip_conntrack_get(skb, &ctinfo); - /* ecache->events cleared by __deliver_cached_devents() */ - } else { - DEBUGP("ecache: re-entered for conntrack %p.\n", ct); - } + ecache = &__get_cpu_var(ip_conntrack_ecache); + BUG_ON(ecache->ct == ct); + if (ecache->ct) + __ip_ct_deliver_cached_events(ecache); + /* initialize for this conntrack/packet */ + ecache->ct = ct; + nf_conntrack_get(&ct->ct_general); } +/* flush the event cache - touches other CPU's data and must not be called while + * packets are still passing through the code */ +static void ip_ct_event_cache_flush(void) +{ + struct ip_conntrack_ecache *ecache; + int cpu; + + for_each_cpu(cpu) { + ecache = &per_cpu(ip_conntrack_ecache, cpu); + if (ecache->ct) + ip_conntrack_put(ecache->ct); + } +} +#else +static inline void ip_ct_event_cache_flush(void) {} #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); @@ -878,8 +867,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, IP_NF_ASSERT((*pskb)->nfct); - ip_conntrack_event_cache_init(*pskb); - ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells @@ -1278,23 +1265,6 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) ip_conntrack_put(ct); } - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - { - /* we need to deliver all cached events in order to drop - * the reference counts */ - int cpu; - for_each_cpu(cpu) { - struct ip_conntrack_ecache *ecache = - &per_cpu(ip_conntrack_ecache, cpu); - if (ecache->ct) { - __ip_ct_deliver_cached_events(ecache); - ip_conntrack_put(ecache->ct); - ecache->ct = NULL; - } - } - } -#endif } /* Fast function for those who don't want to parse /proc (and I don't @@ -1381,6 +1351,7 @@ void ip_conntrack_flush() delete... */ synchronize_net(); + ip_ct_event_cache_flush(); i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ca97c3ac2f2a..ee5895afd0c3 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -401,7 +401,6 @@ static unsigned int ip_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - ip_conntrack_event_cache_init(*pskb); /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(pskb); } @@ -419,7 +418,6 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, ct = ip_conntrack_get(*pskb, &ctinfo); if (ct && ct->helper) { unsigned int ret; - ip_conntrack_event_cache_init(*pskb); ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) return ret; @@ -978,6 +976,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_chain); EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); +EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); #endif EXPORT_SYMBOL(ip_conntrack_protocol_register); From 94cd2b67641e7ddc2e6ed71d76e00116957423db Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 9 Aug 2005 20:02:36 -0700 Subject: [PATCH 302/584] [NETFILTER]: remove bogus memset() calls from ip_conntrack_netlink.c nfattr_parse_nested() calls nfattr_parse() which in turn does a memset on the 'tb' array. All callers therefore don't need to memset before calling it. Signed-off-by: Pablo Neira Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index f43ec18c9166..36a046f22105 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -479,7 +479,6 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_IP_MAX * sizeof(tb)); if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) goto nfattr_failure; @@ -522,8 +521,6 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_PROTO_MAX * sizeof(tb)); - if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) goto nfattr_failure; @@ -556,7 +553,6 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_TUPLE_MAX * sizeof(tb)); memset(tuple, 0, sizeof(*tuple)); if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) @@ -607,8 +603,6 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_PROTONAT_MAX * sizeof(tb)); - if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) goto nfattr_failure; @@ -646,7 +640,6 @@ ctnetlink_parse_nat(struct nfattr *cda[], DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_NAT_MAX * sizeof(tb)); memset(range, 0, sizeof(*range)); if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) @@ -684,7 +677,6 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) struct nfattr *tb[CTA_HELP_MAX]; DEBUGP("entered %s\n", __FUNCTION__); - memset(tb, 0, CTA_HELP_MAX * sizeof(tb)); if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) goto nfattr_failure; From 88aa0429048d08c18f2772782588f953bbbd79be Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 9 Aug 2005 20:02:55 -0700 Subject: [PATCH 303/584] [NETFILTER]: conntrack_netlink: Fix locking during conntrack_create The current codepath allowed for ip_conntrack_lock to be unlock'ed twice. Signed-off-by: Pablo Neira Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 36a046f22105..0ab2d7df6bc4 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1052,13 +1052,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + return err; + } + /* implicit 'else' */ + + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT-1]) { + err = -EINVAL; goto out_unlock; - } else { - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT-1]) { - err = -EINVAL; - goto out_unlock; - } } /* We manipulate the conntrack inside the global conntrack table lock, From bd9a26b7f2ee7567571bb5b7acc1a256c544a0dd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:03:22 -0700 Subject: [PATCH 304/584] [NETFILTER]: fix ctnetlink 'create_expect' parsing There was a stupid copy+paste mistake where we parse the MASK nfattr into the "tuple" variable instead of the "mask" variable. This patch fixes it. Thanks to Pablo Neira. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 0ab2d7df6bc4..23f18f6a5535 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1388,7 +1388,7 @@ ctnetlink_create_expect(struct nfattr *cda[]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASK); + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); if (err < 0) return err; From 927ccbcc28dceee29dad876982768cca29738564 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:03:40 -0700 Subject: [PATCH 305/584] [NETFILTER]: attribute count is an attribute of message type, not subsytem Prior to this patch, every nfnetlink subsystem had to specify it's attribute count. However, in reality the attribute count depends on the message type within the subsystem, not the subsystem itself. This patch moves 'attr_count' from 'struct nfnetlink_subsys' into nfnl_callback to fix this. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- net/ipv4/netfilter/ip_conntrack_netlink.c | 9 +++++++-- net/netfilter/nfnetlink.c | 20 ++++++++++++++++---- net/netfilter/nfnetlink_log.c | 5 +++-- net/netfilter/nfnetlink_queue.c | 4 +++- 5 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 561f9df28808..b0feb2374079 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -85,9 +85,10 @@ struct nfgenmsg { struct nfnl_callback { - kernel_cap_t cap_required; /* capabilities required for this msg */ int (*call)(struct sock *nl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); + kernel_cap_t cap_required; /* capabilities required for this msg */ + u_int16_t attr_count; /* number of nfattr's */ }; struct nfnetlink_subsystem @@ -95,7 +96,6 @@ struct nfnetlink_subsystem const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ - u_int32_t attr_count; /* number of nfattr's */ struct nfnl_callback *cb; /* callback for individual types */ }; diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 23f18f6a5535..53d98974dcf0 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1484,21 +1484,28 @@ static struct notifier_block ctnl_notifier_exp = { static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, }; static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -1506,7 +1513,6 @@ static struct nfnetlink_subsystem ctnl_subsys = { .name = "conntrack", .subsys_id = NFNL_SUBSYS_CTNETLINK, .cb_count = IPCTNL_MSG_MAX, - .attr_count = CTA_MAX, .cb = ctnl_cb, }; @@ -1514,7 +1520,6 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = { .name = "conntrack_expect", .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, .cb_count = IPCTNL_MSG_EXP_MAX, - .attr_count = CTA_MAX, .cb = ctnl_exp_cb, }; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 30b25f47f7cc..578e4fe40945 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -155,8 +155,18 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, struct nlmsghdr *nlh, struct nfattr *cda[]) { int min_len; + u_int16_t attr_count; + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - memset(cda, 0, sizeof(struct nfattr *) * subsys->attr_count); + if (unlikely(cb_id >= subsys->cb_count)) { + DEBUGP("msgtype %u >= %u, returning\n", + cb_id, subsys->cb_count); + return -EINVAL; + } + + attr_count = subsys->cb[cb_id].attr_count; + + memset(cda, 0, sizeof(struct nfattr *) * attr_count); /* check attribute lengths. */ min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); @@ -170,7 +180,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, while (NFA_OK(attr, attrlen)) { unsigned flavor = attr->nfa_type; if (flavor) { - if (flavor > subsys->attr_count) + if (flavor > attr_count) return -EINVAL; cda[flavor - 1] = attr; } @@ -256,9 +266,11 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, } { - struct nfattr *cda[ss->attr_count]; + u_int16_t attr_count = + ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; + struct nfattr *cda[attr_count]; - memset(cda, 0, ss->attr_count*sizeof(struct nfattr *)); + memset(cda, 0, sizeof(struct nfattr *) * attr_count); err = nfnetlink_check_attributes(ss, nlh, cda); if (err < 0) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f41045e385ae..1750f0d6e4de 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -805,8 +805,10 @@ out_put: static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFULA_MAX, + .cap_required = CAP_NET_ADMIN, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, + .attr_count = NFULA_CFG_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -814,7 +816,6 @@ static struct nfnetlink_subsystem nfulnl_subsys = { .name = "log", .subsys_id = NFNL_SUBSYS_ULOG, .cb_count = NFULNL_MSG_MAX, - .attr_count = NFULA_MAX, .cb = nfulnl_cb, }; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d7b0330d64b4..04323ee1eb8d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -877,10 +877,13 @@ out_put: static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .attr_count = NFQA_MAX, .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .attr_count = NFQA_MAX, .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .attr_count = NFQA_CFG_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -888,7 +891,6 @@ static struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, - .attr_count = NFQA_MAX, .cb = nfqnl_cb, }; From a42827b71b87fc9816d2f58626e825b0eb500efe Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:03:54 -0700 Subject: [PATCH 306/584] [NETFILTER]: cleanup nfnetlink_check_attributes() 1) memset return parameter 'cda' (nfattr pointer array) only on success 2) a message without attributes and just a 'struct nfgenmsg' is valid, don't return -EINVAL 3) use likely() and unlikely() where apropriate Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/netfilter/nfnetlink.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 578e4fe40945..84efffdbade3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -163,17 +163,16 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, cb_id, subsys->cb_count); return -EINVAL; } - - attr_count = subsys->cb[cb_id].attr_count; + min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); + if (unlikely(nlh->nlmsg_len < min_len)) + return -EINVAL; + + attr_count = subsys->cb[cb_id].attr_count; memset(cda, 0, sizeof(struct nfattr *) * attr_count); /* check attribute lengths. */ - min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { + if (likely(nlh->nlmsg_len > min_len)) { struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); @@ -186,8 +185,10 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, } attr = NFA_NEXT(attr, attrlen); } - } else - return -EINVAL; + } + + /* implicit: if nlmsg_len == min_len, we return 0, and an empty + * (zeroed) cda[] array. The message is valid, but empty. */ return 0; } From 1444fc559b01aa5d4fedf4ee4f306a9e9cd56f95 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:04:07 -0700 Subject: [PATCH 307/584] [NETFILTER]: don't use nested attributes for conntrack_expect We used to use nested nfattr structures for ip_conntrack_expect. This is bogus, since ip_conntrack and ip_conntrack_expect are communicated in different netlink message types. both should be encoded at the top level attributes, no extra nesting required. This patch addresses the issue. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 3 +- net/ipv4/netfilter/ip_conntrack_netlink.c | 85 +++++++++---------- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb528e0e3bd9..5c55751c78e4 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -33,7 +33,6 @@ enum ctattr_type { CTA_COUNTERS_ORIG, CTA_COUNTERS_REPLY, CTA_USE, - CTA_EXPECT, CTA_ID, __CTA_MAX }; @@ -103,10 +102,12 @@ enum ctattr_protonat { enum ctattr_expect { CTA_EXPECT_UNSPEC, + CTA_EXPECT_MASTER, CTA_EXPECT_TUPLE, CTA_EXPECT_MASK, CTA_EXPECT_TIMEOUT, CTA_EXPECT_ID, + CTA_EXPECT_HELP_NAME, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 53d98974dcf0..f5bda82c2875 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1100,18 +1100,21 @@ static inline int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct ip_conntrack_expect *exp) { + struct ip_conntrack *master = exp->master; u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); u_int32_t id = htonl(exp->id); - struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, + &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + CTA_EXPECT_MASTER) < 0) + goto nfattr_failure; NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); - NFA_NEST_END(skb, nest_parms); return 0; @@ -1259,10 +1262,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + if (cda[CTA_EXPECT_MASTER-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER); else return -EINVAL; @@ -1310,13 +1311,33 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct ip_conntrack_helper *h; int err; - /* delete by tuple needs either orig or reply tuple */ - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); - else if (cda[CTA_HELP_NAME-1]) { - char *name = NFA_DATA(cda[CTA_HELP_NAME-1]); + if (cda[CTA_EXPECT_TUPLE-1]) { + /* delete a single expect by tuple */ + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = + *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + ip_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + ip_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. + * after this line usage count == 0 */ + ip_conntrack_expect_put(exp); + } else if (cda[CTA_EXPECT_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); /* delete all expectations for this helper */ write_lock_bh(&ip_conntrack_lock); @@ -1332,7 +1353,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, __ip_ct_expect_unlink_destroy(exp); } write_unlock(&ip_conntrack_lock); - return 0; } else { /* This basically means we have to flush everything*/ write_lock_bh(&ip_conntrack_lock); @@ -1342,30 +1362,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, __ip_ct_expect_unlink_destroy(exp); } write_unlock_bh(&ip_conntrack_lock); - return 0; } - if (err < 0) - return err; - - /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); - if (!exp) - return -ENOENT; - - if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { - ip_conntrack_expect_put(exp); - return -ENOENT; - } - } - - /* after list removal, usage count == 1 */ - ip_conntrack_unexpect_related(exp); - /* have to put what we 'get' above. after this line usage count == 0 */ - ip_conntrack_expect_put(exp); - return 0; } static int @@ -1385,21 +1383,14 @@ ctnetlink_create_expect(struct nfattr *cda[]) DEBUGP("entered %s\n", __FUNCTION__); + /* caller guarantees that those three CTA_EXPECT_* exist */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); if (err < 0) return err; - - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &master_tuple, - CTA_TUPLE_REPLY); - else - return -EINVAL; - + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER); if (err < 0) return err; @@ -1444,7 +1435,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1]) + if (!cda[CTA_EXPECT_TUPLE-1] + || !cda[CTA_EXPECT_MASK-1] + || !cda[CTA_EXPECT_MASTER-1]) return -EINVAL; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); From 14a50bbaa51202b676a95e9b41bc5ed6c77aa9cc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Aug 2005 20:05:52 -0700 Subject: [PATCH 308/584] [NETFILTER]: ctnetlink: make sure event order is correct The following sequence is displayed during events dumping of an ICMP connection: [NEW] [DESTROY] [UPDATE] This happens because the event IPCT_DESTROY is delivered in death_by_timeout(), that is called from the icmp protocol helper (ct->timeout.function) once we see the reply. To fix this, we move this event to destroy_conntrack(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 5c3f16eae2d8..dace93eacc5c 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -316,6 +316,7 @@ destroy_conntrack(struct nf_conntrack *nfct) IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); + ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); /* To make sure we don't get any weird locking issues here: @@ -355,7 +356,6 @@ static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; - ip_conntrack_event(IPCT_DESTROY, ct); write_lock_bh(&ip_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ From 37012f7fd326eb3c959428a4fe7e203e6304fe43 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Aug 2005 20:06:11 -0700 Subject: [PATCH 309/584] [NETFILTER]: fix conntrack refcount leak in unlink_expect() In unlink_expect(), the expectation is removed from the list so the refcount must be dropped as well. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index dace93eacc5c..9261388d5ac2 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -204,6 +204,7 @@ static void unlink_expect(struct ip_conntrack_expect *exp) list_del(&exp->list); CONNTRACK_STAT_INC(expect_delete); exp->master->expecting--; + ip_conntrack_expect_put(exp); } void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) From 28b19d99ac6d92e4fb2fe34144c495019a638a64 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Aug 2005 20:06:27 -0700 Subject: [PATCH 310/584] [NETFILTER]: Fix typo in ctnl_exp_cb array (no bug, just memory waste) This fixes the size of the ctnl_exp_cb array that is IPCTNL_MSG_EXP_MAX instead of IPCTNL_MSG_MAX. Simple typo. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index f5bda82c2875..e3ba449e3e1d 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1490,7 +1490,7 @@ static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { .cap_required = CAP_NET_ADMIN }, }; -static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { +static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, From ff21d5774b4a186c98be6398eacde75d896db804 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Aug 2005 20:06:42 -0700 Subject: [PATCH 311/584] [NETFILTER]: fix list traversal order in ctnetlink Currently conntracks are inserted after the head. That means that conntracks are sorted from the biggest to the smallest id. This happens because we use list_prepend (list_add) instead list_add_tail. This can result in problems during the list iteration. list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = tuplehash_to_ctrack(h); if (ct->id <= *id) continue; In that case just the first conntrack in the bucket will be dumped. To fix this, we iterate the list from the tail to the head via list_for_each_prev. Same thing for the list of expectations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index e3ba449e3e1d..1221a9c8bac2 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -404,7 +404,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) read_lock_bh(&ip_conntrack_lock); for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -441,7 +441,7 @@ ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) write_lock_bh(&ip_conntrack_lock); for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -1214,7 +1214,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); read_lock_bh(&ip_conntrack_lock); - list_for_each(i, &ip_conntrack_expect_list) { + list_for_each_prev(i, &ip_conntrack_expect_list) { exp = (struct ip_conntrack_expect *) i; if (exp->id <= *id) continue; From 2d8c4ce51903636ce0f60addc8134aa50ab8fa76 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:13 -0700 Subject: [PATCH 312/584] [INET]: Generalise tcp_bind_hash & tcp_inherit_port This required moving tcp_bucket_cachep to inet_hashinfo. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 32 +++++++++++++++++ include/net/tcp.h | 11 ++---- net/ipv4/inet_hashtables.c | 40 +++++++++++++++++++++ net/ipv4/tcp.c | 4 --- net/ipv4/tcp_ipv4.c | 68 +++-------------------------------- net/ipv6/tcp_ipv6.c | 6 ++-- 6 files changed, 81 insertions(+), 80 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3a6c11ca421d..da9705525f15 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,12 +14,15 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include #include #include #include #include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -113,6 +116,7 @@ struct inet_hashinfo { atomic_t lhash_users; wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; + kmem_cache_t *bind_bucket_cachep; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, @@ -148,6 +152,9 @@ static inline int inet_bhashfn(const __u16 lport, const int bhash_size) return lport & (bhash_size - 1); } +extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum); + /* These can have wildcards, don't try too hard. */ static inline int inet_lhashfn(const unsigned short num) { @@ -159,4 +166,29 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) return inet_lhashfn(inet_sk(sk)->num); } +/* Caller must disable local BH processing. */ +static inline void __inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet_sk(sk)->bind_hash; + sk_add_bind_node(child, &tb->owners); + inet_sk(child)->bind_hash = tb; + spin_unlock(&head->lock); +} + +static inline void inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + local_bh_disable(); + __inet_inherit_port(table, sk, child); + local_bh_enable(); +} + +extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); + #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c6c879e7e87..9eb8ff7c911e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -51,14 +51,10 @@ extern struct inet_hashinfo tcp_hashinfo; #define tcp_lhash_users (tcp_hashinfo.lhash_users) #define tcp_lhash_wait (tcp_hashinfo.lhash_wait) #define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) - -extern kmem_cache_t *tcp_bucket_cachep; +#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) extern int tcp_port_rover; -extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - unsigned short snum); - #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 #else @@ -549,9 +545,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) -extern void tcp_put_port(struct sock *sk); -extern void tcp_inherit_port(struct sock *sk, struct sock *child); - extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -1268,7 +1261,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) sk->sk_prot->unhash(sk); if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 343a890bd617..33d6cbe32cdc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -14,6 +14,7 @@ */ #include +#include #include #include @@ -49,3 +50,42 @@ void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) kmem_cache_free(cachep, tb); } } + +void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum) +{ + struct inet_sock *inet = inet_sk(sk); + inet->num = snum; + sk_add_bind_node(sk, &tb->owners); + inet->bind_hash = tb; +} + +EXPORT_SYMBOL(inet_bind_hash); + +/* + * Get rid of any references to a local port held by the given sock. + */ +static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + const int bhash = inet_bhashfn(inet->num, hashinfo->bhash_size); + struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet->bind_hash; + __sk_del_bind_node(sk); + inet->bind_hash = NULL; + inet->num = 0; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + spin_unlock(&head->lock); +} + +void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + local_bh_disable(); + __inet_put_port(hashinfo, sk); + local_bh_enable(); +} + +EXPORT_SYMBOL(inet_put_port); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e54a410ca701..38c04c1a754c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,10 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); -kmem_cache_t *tcp_bucket_cachep; - -EXPORT_SYMBOL_GPL(tcp_bucket_cachep); - kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 10a9b3ae3442..40fe4f5fca1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,37 +104,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Caller must disable local BH processing. */ -static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) -{ - struct inet_bind_hashbucket *head = - &tcp_bhash[inet_bhashfn(inet_sk(child)->num, - tcp_bhash_size)]; - struct inet_bind_bucket *tb; - - spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; - sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; - spin_unlock(&head->lock); -} - -inline void tcp_inherit_port(struct sock *sk, struct sock *child) -{ - local_bh_disable(); - __tcp_inherit_port(sk, child); - local_bh_enable(); -} - -void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum) -{ - struct inet_sock *inet = inet_sk(sk); - inet->num = snum; - sk_add_bind_node(sk, &tb->owners); - inet->bind_hash = tb; -} - static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); @@ -248,7 +217,7 @@ tb_not_found: tb->fastreuse = 0; success: if (!inet_sk(sk)->bind_hash) - tcp_bind_hash(sk, tb, snum); + inet_bind_hash(sk, tb, snum); BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; @@ -259,32 +228,6 @@ fail: return ret; } -/* Get rid of any references to a local port held by the - * given sock. - */ -static void __tcp_put_port(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct inet_bind_hashbucket *head = &tcp_bhash[inet_bhashfn(inet->num, - tcp_bhash_size)]; - struct inet_bind_bucket *tb; - - spin_lock(&head->lock); - tb = inet->bind_hash; - __sk_del_bind_node(sk); - inet->bind_hash = NULL; - inet->num = 0; - inet_bind_bucket_destroy(tcp_bucket_cachep, tb); - spin_unlock(&head->lock); -} - -void tcp_put_port(struct sock *sk) -{ - local_bh_disable(); - __tcp_put_port(sk); - local_bh_enable(); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -678,7 +621,7 @@ ok: hint += i; /* Head lock still held and bh's disabled */ - tcp_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v4_hash(sk, 0); @@ -1537,7 +1480,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); __tcp_v4_hash(newsk, 0); - __tcp_inherit_port(sk, newsk); + __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; @@ -1942,7 +1885,7 @@ int tcp_v4_destroy_sock(struct sock *sk) /* Clean up a referenced TCP bind bucket. */ if (inet_sk(sk)->bind_hash) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* * If sendmsg cached page exists, toss it. @@ -2486,14 +2429,11 @@ void __init tcp_v4_init(struct net_proto_family *ops) } EXPORT_SYMBOL(ipv4_specific); -EXPORT_SYMBOL(tcp_bind_hash); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_port_rover); EXPORT_SYMBOL(tcp_prot); -EXPORT_SYMBOL(tcp_put_port); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a8ca7ba06c1c..bfbedb56bce2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -205,7 +205,7 @@ tb_not_found: success: if (!inet_sk(sk)->bind_hash) - tcp_bind_hash(sk, tb, snum); + inet_bind_hash(sk, tb, snum); BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; @@ -597,7 +597,7 @@ ok: hint += i; /* Head lock still held and bh's disabled */ - tcp_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v6_hash(sk); @@ -1536,7 +1536,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; __tcp_v6_hash(newsk); - tcp_inherit_port(sk, newsk); + inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; From 6e04e02165a7209a71db553b7bc48d68421e5ebf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:35 -0700 Subject: [PATCH 313/584] [INET]: Move tcp_port_rover to inet_hashinfo Also expose all of the tcp_hashinfo members, i.e. killing those tcp_ehash, etc macros, this will more clearly expose already generic functions and some that need just a bit of work to become generic, as we'll see in the upcoming changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 26 +++------ net/ipv4/tcp.c | 42 +++++++------- net/ipv4/tcp_diag.c | 8 +-- net/ipv4/tcp_ipv4.c | 101 +++++++++++++++++----------------- net/ipv4/tcp_minisocks.c | 15 +++-- net/ipv6/tcp_ipv6.c | 51 +++++++++-------- 8 files changed, 118 insertions(+), 128 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da9705525f15..da07411b36d2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -117,6 +117,7 @@ struct inet_hashinfo { wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; kmem_cache_t *bind_bucket_cachep; + int port_rover; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, diff --git a/include/net/sock.h b/include/net/sock.h index 69d869e41c35..391d00b5b7b4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -136,7 +136,7 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) + * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb8ff7c911e..99e47695d4b6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,19 +41,7 @@ #endif #include -extern struct inet_hashinfo tcp_hashinfo; -#define tcp_ehash (tcp_hashinfo.ehash) -#define tcp_bhash (tcp_hashinfo.bhash) -#define tcp_ehash_size (tcp_hashinfo.ehash_size) -#define tcp_bhash_size (tcp_hashinfo.bhash_size) -#define tcp_listening_hash (tcp_hashinfo.listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) -#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) - -extern int tcp_port_rover; +extern struct inet_hashinfo tcp_hashinfo; #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 @@ -1463,21 +1451,21 @@ extern void tcp_listen_wlock(void); /* - We may sleep inside this lock. * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&tcp_lhash_lock). + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void tcp_listen_lock(void) { /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_lhash_lock); - atomic_inc(&tcp_lhash_users); - read_unlock(&tcp_lhash_lock); + read_lock(&tcp_hashinfo.lhash_lock); + atomic_inc(&tcp_hashinfo.lhash_users); + read_unlock(&tcp_hashinfo.lhash_lock); } static inline void tcp_listen_unlock(void) { - if (atomic_dec_and_test(&tcp_lhash_users)) - wake_up(&tcp_lhash_wait); + if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) + wake_up(&tcp_hashinfo.lhash_wait); } static inline int keepalive_intvl_when(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 38c04c1a754c..2f4b1a374bb7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2257,11 +2257,11 @@ void __init tcp_init(void) __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), sizeof(skb->cb)); - tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct inet_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!tcp_bucket_cachep) + tcp_hashinfo.bind_bucket_cachep = + kmem_cache_create("tcp_bind_bucket", + sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!tcp_hashinfo.bind_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", @@ -2276,7 +2276,7 @@ void __init tcp_init(void) * * The methodology is similar to that of the buffer cache. */ - tcp_ehash = + tcp_hashinfo.ehash = alloc_large_system_hash("TCP established", sizeof(struct inet_ehash_bucket), thash_entries, @@ -2284,37 +2284,37 @@ void __init tcp_init(void) (25 - PAGE_SHIFT) : (27 - PAGE_SHIFT), HASH_HIGHMEM, - &tcp_ehash_size, + &tcp_hashinfo.ehash_size, NULL, 0); - tcp_ehash_size = (1 << tcp_ehash_size) >> 1; - for (i = 0; i < (tcp_ehash_size << 1); i++) { - rwlock_init(&tcp_ehash[i].lock); - INIT_HLIST_HEAD(&tcp_ehash[i].chain); + tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1; + for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) { + rwlock_init(&tcp_hashinfo.ehash[i].lock); + INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); } - tcp_bhash = + tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), - tcp_ehash_size, + tcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : (27 - PAGE_SHIFT), HASH_HIGHMEM, - &tcp_bhash_size, + &tcp_hashinfo.bhash_size, NULL, 64 * 1024); - tcp_bhash_size = 1 << tcp_bhash_size; - for (i = 0; i < tcp_bhash_size; i++) { - spin_lock_init(&tcp_bhash[i].lock); - INIT_HLIST_HEAD(&tcp_bhash[i].chain); + tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; + for (i = 0; i < tcp_hashinfo.bhash_size; i++) { + spin_lock_init(&tcp_hashinfo.bhash[i].lock); + INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } /* Try to be a bit smarter and adjust defaults depending * on available memory. */ for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_bhash_size * sizeof(struct inet_bind_hashbucket)); + (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4) { @@ -2329,7 +2329,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } - tcp_port_rover = sysctl_local_port_range[0] - 1; + tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; sysctl_tcp_mem[0] = 768 << order; sysctl_tcp_mem[1] = 1024 << order; @@ -2344,7 +2344,7 @@ void __init tcp_init(void) printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", - tcp_ehash_size << 1, tcp_bhash_size); + tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5bb6a0f1c77b..0ae738b455f0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -595,7 +595,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; num = 0; - sk_for_each(sk, node, &tcp_listening_hash[i]) { + sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -645,8 +645,8 @@ skip_listen_ht: if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) return skb->len; - for (i = s_i; i < tcp_ehash_size; i++) { - struct inet_ehash_bucket *head = &tcp_ehash[i]; + for (i = s_i; i < tcp_hashinfo.ehash_size; i++) { + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[i]; struct sock *sk; struct hlist_node *node; @@ -678,7 +678,7 @@ next_normal: if (r->tcpdiag_states&TCPF_TIME_WAIT) { sk_for_each(sk, node, - &tcp_ehash[i + tcp_ehash_size].chain) { + &tcp_hashinfo.ehash[i + tcp_hashinfo.ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 40fe4f5fca1c..f5373f9f00ac 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,6 +94,7 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), .portalloc_lock = SPIN_LOCK_UNLOCKED, + .port_rover = 1024 - 1, }; /* @@ -102,7 +103,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { * 32768-61000 */ int sysctl_local_port_range[2] = { 1024, 4999 }; -int tcp_port_rover = 1024 - 1; static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { @@ -146,16 +146,16 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_portalloc_lock); - if (tcp_port_rover < low) + spin_lock(&tcp_hashinfo.portalloc_lock); + if (tcp_hashinfo.port_rover < low) rover = low; else - rover = tcp_port_rover; + rover = tcp_hashinfo.port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -164,8 +164,8 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); + tcp_hashinfo.port_rover = rover; + spin_unlock(&tcp_hashinfo.portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -182,7 +182,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -205,7 +205,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -237,22 +237,22 @@ fail: void tcp_listen_wlock(void) { - write_lock(&tcp_lhash_lock); + write_lock(&tcp_hashinfo.lhash_lock); - if (atomic_read(&tcp_lhash_users)) { + if (atomic_read(&tcp_hashinfo.lhash_users)) { DEFINE_WAIT(wait); for (;;) { - prepare_to_wait_exclusive(&tcp_lhash_wait, + prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&tcp_lhash_users)) + if (!atomic_read(&tcp_hashinfo.lhash_users)) break; - write_unlock_bh(&tcp_lhash_lock); + write_unlock_bh(&tcp_hashinfo.lhash_lock); schedule(); - write_lock_bh(&tcp_lhash_lock); + write_lock_bh(&tcp_hashinfo.lhash_lock); } - finish_wait(&tcp_lhash_wait, &wait); + finish_wait(&tcp_hashinfo.lhash_wait, &wait); } } @@ -263,20 +263,20 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_lhash_lock; + list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { - sk->sk_hashent = inet_sk_ehashfn(sk, tcp_ehash_size); - list = &tcp_ehash[sk->sk_hashent].chain; - lock = &tcp_ehash[sk->sk_hashent].lock; + sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); + list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; + lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); + wake_up(&tcp_hashinfo.lhash_wait); } static void tcp_v4_hash(struct sock *sk) @@ -298,9 +298,9 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); tcp_listen_wlock(); - lock = &tcp_lhash_lock; + lock = &tcp_hashinfo.lhash_lock; } else { - struct inet_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; lock = &head->lock; write_lock_bh(&head->lock); } @@ -311,7 +311,7 @@ void tcp_unhash(struct sock *sk) ende: if (sk->sk_state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); + wake_up(&tcp_hashinfo.lhash_wait); } /* Don't inline this cruft. Here are some nice properties to @@ -366,8 +366,8 @@ static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, struct sock *sk = NULL; struct hlist_head *head; - read_lock(&tcp_lhash_lock); - head = &tcp_listening_hash[inet_lhashfn(hnum)]; + read_lock(&tcp_hashinfo.lhash_lock); + head = &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -382,7 +382,7 @@ static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, sherry_cache: sock_hold(sk); } - read_unlock(&tcp_lhash_lock); + read_unlock(&tcp_hashinfo.lhash_lock); return sk; } @@ -406,8 +406,8 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_ehash_size); - head = &tcp_ehash[hash]; + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size); + head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) @@ -415,7 +415,7 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; } @@ -469,8 +469,8 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); - struct inet_ehash_bucket *head = &tcp_ehash[hash]; + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -478,7 +478,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = (struct tcp_tw_bucket *)sk2; if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { @@ -582,7 +582,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -602,7 +602,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -637,7 +637,7 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -1926,7 +1926,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_listening_hash[0]); + sk = sk_head(&tcp_hashinfo.listening_hash[0]); goto get_sk; } @@ -1980,7 +1980,7 @@ start_req: read_unlock_bh(&tp->accept_queue.syn_wait_lock); } if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_listening_hash[st->bucket]); + sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); goto get_sk; } cur = NULL; @@ -2004,7 +2004,7 @@ static void *established_get_first(struct seq_file *seq) struct tcp_iter_state* st = seq->private; void *rc = NULL; - for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { + for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -2012,8 +2012,8 @@ static void *established_get_first(struct seq_file *seq) /* We can reschedule _before_ having picked the target: */ cond_resched_softirq(); - read_lock(&tcp_ehash[st->bucket].lock); - sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { + read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; } @@ -2022,14 +2022,14 @@ static void *established_get_first(struct seq_file *seq) } st->state = TCP_SEQ_STATE_TIME_WAIT; tw_for_each(tw, node, - &tcp_ehash[st->bucket + tcp_ehash_size].chain) { + &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { if (tw->tw_family != st->family) { continue; } rc = tw; goto out; } - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2056,15 +2056,15 @@ get_tw: cur = tw; goto out; } - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; /* We can reschedule between buckets: */ cond_resched_softirq(); - if (++st->bucket < tcp_ehash_size) { - read_lock(&tcp_ehash[st->bucket].lock); - sk = sk_head(&tcp_ehash[st->bucket].chain); + if (++st->bucket < tcp_hashinfo.ehash_size) { + read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; goto out; @@ -2078,7 +2078,7 @@ get_tw: } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); + tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); goto get_tw; found: cur = sk; @@ -2173,7 +2173,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); local_bh_enable(); break; } @@ -2432,7 +2432,6 @@ EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_listen_wlock); -EXPORT_SYMBOL(tcp_port_rover); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 267cea1087e5..f29e2f6ebe1b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,12 +60,11 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - struct inet_ehash_bucket *ehead; struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; - /* Unlink from established hashes. */ - ehead = &tcp_ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[tw->tw_hashent]; + write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { write_unlock(&ehead->lock); @@ -76,12 +75,12 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &tcp_bhash[inet_bhashfn(tw->tw_num, tcp_bhash_size)]; + bhead = &tcp_hashinfo.bhash[inet_bhashfn(tw->tw_num, tcp_hashinfo.bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - inet_bind_bucket_destroy(tcp_bucket_cachep, tb); + inet_bind_bucket_destroy(tcp_hashinfo.bind_bucket_cachep, tb); spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG @@ -297,13 +296,13 @@ kill: static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { const struct inet_sock *inet = inet_sk(sk); - struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[inet_bhashfn(inet->num, tcp_bhash_size)]; + bhead = &tcp_hashinfo.bhash[inet_bhashfn(inet->num, tcp_hashinfo.bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = inet->bind_hash; BUG_TRAP(inet->bind_hash); @@ -317,7 +316,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->sk_prot); /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - tw_add_node(tw, &(ehead + tcp_ehash_size)->chain); + tw_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bfbedb56bce2..362ef5a64062 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -84,7 +84,7 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; - return (hashent & (tcp_ehash_size - 1)); + return (hashent & (tcp_hashinfo.ehash_size - 1)); } static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) @@ -138,15 +138,15 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_portalloc_lock); - if (tcp_port_rover < low) + spin_lock(&tcp_hashinfo.portalloc_lock); + if (tcp_hashinfo.port_rover < low) rover = low; else - rover = tcp_port_rover; + rover = tcp_hashinfo.port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -155,8 +155,8 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); + tcp_hashinfo.port_rover = rover; + spin_unlock(&tcp_hashinfo.portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -171,7 +171,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) /* OK, here is the one we will use. */ snum = rover; } else { - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -192,8 +192,11 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) - goto fail_unlock; + if (tb == NULL) { + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); + if (tb == NULL) + goto fail_unlock; + } if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) tb->fastreuse = 1; @@ -224,13 +227,13 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_lhash_lock; + list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { sk->sk_hashent = tcp_v6_sk_hashfn(sk); - list = &tcp_ehash[sk->sk_hashent].chain; - lock = &tcp_ehash[sk->sk_hashent].lock; + list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; + lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } @@ -263,8 +266,8 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; - read_lock(&tcp_lhash_lock); - sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) { + read_lock(&tcp_hashinfo.lhash_lock); + sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -291,7 +294,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor } if (result) sock_hold(result); - read_unlock(&tcp_lhash_lock); + read_unlock(&tcp_hashinfo.lhash_lock); return result; } @@ -315,7 +318,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u * have wildcards anyways. */ hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - head = &tcp_ehash[hash]; + head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ @@ -323,7 +326,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { /* FIXME: acme: check this... */ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; @@ -461,7 +464,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); - struct inet_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -469,7 +472,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = (struct tcp_tw_bucket*)sk2; if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -558,7 +561,7 @@ static int tcp_v6_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -578,7 +581,7 @@ static int tcp_v6_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -613,7 +616,7 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); From f3f05f7046e7c85b04af390d95a82a27160dd5d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:09 -0700 Subject: [PATCH 314/584] [INET]: Generalise the tcp_listen_ lock routines Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 48 ++++++++++++++++++++++++ include/net/tcp.h | 21 ----------- net/ipv4/inet_hashtables.c | 32 ++++++++++++++++ net/ipv4/tcp_diag.c | 8 ++-- net/ipv4/tcp_ipv4.c | 70 +++++------------------------------ net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 94 insertions(+), 87 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da07411b36d2..f5d65121f7b7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,10 +19,14 @@ #include #include #include +#include /* only for TCP_LISTEN, damn :-( */ #include +#include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); +extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); + +/* + * - We may sleep inside this lock. + * - If sleeping is not required (or called from BH), + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). + */ +static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) +{ + /* read_lock synchronizes to candidates to writers */ + read_lock(&hashinfo->lhash_lock); + atomic_inc(&hashinfo->lhash_users); + read_unlock(&hashinfo->lhash_lock); +} + +static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) +{ + if (atomic_dec_and_test(&hashinfo->lhash_users)) + wake_up(&hashinfo->lhash_wait); +} + +static inline void __inet_hash(struct inet_hashinfo *hashinfo, + struct sock *sk, const int listen_possible) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + if (listen_possible && sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); + list = &hashinfo->ehash[sk->sk_hashent].chain; + lock = &hashinfo->ehash[sk->sk_hashent].lock; + write_lock(lock); + } + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); + if (listen_possible && sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 99e47695d4b6..bc110cc7022b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, extern void tcp_enter_memory_pressure(void); -extern void tcp_listen_wlock(void); - -/* - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ - -static inline void tcp_listen_lock(void) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_hashinfo.lhash_lock); - atomic_inc(&tcp_hashinfo.lhash_users); - read_unlock(&tcp_hashinfo.lhash_lock); -} - -static inline void tcp_listen_unlock(void) -{ - if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) - wake_up(&tcp_hashinfo.lhash_wait); -} - static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 33d6cbe32cdc..06cbc6f689c5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,7 +15,9 @@ #include #include +#include #include +#include #include @@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL(inet_put_port); + +/* + * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. + * Look, when several writers sleep and reader wakes them up, all but one + * immediately hit write lock and grab all the cpus. Exclusive sleep solves + * this, _but_ remember, it adds useless work on UP machines (wake up each + * exclusive lock release). It should be ifdefed really. + */ +void inet_listen_wlock(struct inet_hashinfo *hashinfo) +{ + write_lock(&hashinfo->lhash_lock); + + if (atomic_read(&hashinfo->lhash_users)) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait_exclusive(&hashinfo->lhash_wait, + &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&hashinfo->lhash_users)) + break; + write_unlock_bh(&hashinfo->lhash_lock); + schedule(); + write_lock_bh(&hashinfo->lhash_lock); + } + + finish_wait(&hashinfo->lhash_wait, &wait); + } +} + +EXPORT_SYMBOL(inet_listen_wlock); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0ae738b455f0..1a89a03c449b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; - tcp_listen_lock(); + inet_listen_lock(&tcp_hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; @@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); goto done; } @@ -622,7 +622,7 @@ syn_recv: goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); goto done; } @@ -636,7 +636,7 @@ next_listen: cb->args[3] = 0; cb->args[4] = 0; } - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f5373f9f00ac..5f9ad95304ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -228,62 +228,11 @@ fail: return ret; } -/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ - -void tcp_listen_wlock(void) -{ - write_lock(&tcp_hashinfo.lhash_lock); - - if (atomic_read(&tcp_hashinfo.lhash_users)) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&tcp_hashinfo.lhash_users)) - break; - write_unlock_bh(&tcp_hashinfo.lhash_lock); - schedule(); - write_lock_bh(&tcp_hashinfo.lhash_lock); - } - - finish_wait(&tcp_hashinfo.lhash_wait, &wait); - } -} - -static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) -{ - struct hlist_head *list; - rwlock_t *lock; - - BUG_TRAP(sk_unhashed(sk)); - if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_hashinfo.lhash_lock; - tcp_listen_wlock(); - } else { - sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); - list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; - lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; - write_lock(lock); - } - __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock(lock); - if (listen_possible && sk->sk_state == TCP_LISTEN) - wake_up(&tcp_hashinfo.lhash_wait); -} - static void tcp_v4_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __tcp_v4_hash(sk, 1); + __inet_hash(&tcp_hashinfo, sk, 1); local_bh_enable(); } } @@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); - tcp_listen_wlock(); + inet_listen_wlock(&tcp_hashinfo); lock = &tcp_hashinfo.lhash_lock; } else { struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -624,7 +573,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); - __tcp_v4_hash(sk, 0); + __inet_hash(&tcp_hashinfo, sk, 0); } spin_unlock(&head->lock); @@ -641,7 +590,7 @@ ok: tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __tcp_v4_hash(sk, 0); + __inet_hash(&tcp_hashinfo, sk, 0); spin_unlock_bh(&head->lock); return 0; } else { @@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); - __tcp_v4_hash(newsk, 0); + __inet_hash(&tcp_hashinfo, newsk, 0); __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; @@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state* st = seq->private; - tcp_listen_lock(); + inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); @@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); @@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: @@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 362ef5a64062..93a66b9a76e1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_hashinfo.lhash_lock; - tcp_listen_wlock(); + inet_listen_wlock(&tcp_hashinfo); } else { sk->sk_hashent = tcp_v6_sk_hashfn(sk); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; From c752f0739f09b803aed191c4765a3b6650a08653 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:28 -0700 Subject: [PATCH 315/584] [TCP]: Move the tcp sock states to net/tcp_states.h Lots of places just needs the states, not even linux/tcp.h, where this enum was, needs it. This speeds up development of the refactorings as less sources are rebuilt when things get moved from net/tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- fs/smbfs/sock.c | 2 +- include/linux/tcp.h | 18 ------------------ include/net/dn.h | 1 + include/net/inet_hashtables.h | 2 +- include/net/ip6_route.h | 1 - include/net/ip_vs.h | 1 - include/net/sctp/constants.h | 2 +- include/net/tcp.h | 2 ++ include/net/tcp_states.h | 34 ++++++++++++++++++++++++++++++++++ net/appletalk/ddp.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_ds_in.c | 2 +- net/ax25/ax25_ds_timer.c | 2 +- net/ax25/ax25_in.c | 2 +- net/ax25/ax25_std_in.c | 2 +- net/ax25/ax25_std_timer.c | 2 +- net/ax25/ax25_subr.c | 2 +- net/core/datagram.c | 6 +++--- net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/ipvs/ip_vs_app.c | 1 + net/ipv4/protocol.c | 1 - net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 3 ++- net/ipv6/datagram.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/udp.c | 1 + net/ipx/af_ipx.c | 2 +- net/ipx/ipx_proc.c | 2 +- net/irda/af_irda.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 2 +- net/llc/llc_if.c | 2 +- net/llc/llc_sap.c | 2 +- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 2 +- net/netrom/nr_subr.c | 2 +- net/netrom/nr_timer.c | 2 +- net/rose/af_rose.c | 2 +- net/rose/rose_in.c | 2 +- net/rose/rose_route.c | 2 +- net/rose/rose_subr.c | 2 +- net/rose/rose_timer.c | 2 +- net/sctp/ipv6.c | 2 +- net/sunrpc/svcsock.c | 3 +-- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 2 +- net/wanrouter/af_wanpipe.c | 2 +- net/x25/af_x25.c | 2 +- net/x25/x25_in.c | 2 +- net/x25/x25_subr.c | 2 +- net/x25/x25_timer.c | 2 +- 53 files changed, 86 insertions(+), 66 deletions(-) create mode 100644 include/net/tcp_states.h diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index 93f3cd22a2e9..6815b1b12b68 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -15,12 +15,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e70ab19652db..b88fe05fdcbf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,24 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; - -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, /* now a valid state */ - - TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_STATE_MASK 0xF #define TCP_ACTION_FIN (1 << 7) enum { diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db397..c1dbbd222793 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,6 +3,7 @@ #include #include +#include #include typedef unsigned short dn_address; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f5d65121f7b7..c816708fa556 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,11 +19,11 @@ #include #include #include -#include /* only for TCP_LISTEN, damn :-( */ #include #include #include +#include #include diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526b..1f2e428ca364 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617a..7a3c43711a17 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -255,7 +255,6 @@ struct ip_vs_daemon_user { #include /* for struct atomic_t */ #include /* for struct neighbour */ #include /* for struct dst_entry */ -#include #include #include diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bbf..c51541ee0247 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -47,10 +47,10 @@ #ifndef __sctp_constants_h__ #define __sctp_constants_h__ -#include /* For TCP states used in sctp_sock_state_t */ #include #include /* For ipv6hdr. */ #include +#include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; diff --git a/include/net/tcp.h b/include/net/tcp.h index bc110cc7022b..9d026d81d8c8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -36,6 +36,8 @@ #include #include #include +#include + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) #include #endif diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 000000000000..b9d4176b2d15 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#endif /* _LINUX_TCP_STATES_H */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ffde33cd09ba..1d31b3a3f1e5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -53,12 +53,12 @@ #include #include -#include #include #include /* For TIOCOUTQ/INQ */ #include #include #include +#include #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a5c94f11547c..ea43dfb774e2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 8adc0022cf58..5d0f8fb9d7aa 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -23,7 +23,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 3a8b67316fc3..061083efc1dc 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 124eec8216d7..0357705d575d 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include /* For arp_rcv */ #include #include diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 7131873322c4..83a33387c061 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -30,7 +30,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 066897bc0749..a29c480a4dc1 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index eb7343c10a9f..c41dbe5fadee 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/datagram.c b/net/core/datagram.c index fcee054b6f75..da9bf71421a7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -51,9 +50,10 @@ #include #include -#include -#include +#include +#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bd49dd97a09c..621680f127af 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -118,7 +118,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include +#include #include #include #include diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 202dbde9850d..369f25b60f3f 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b1db561f2542..3fd49f4282ac 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index d9212addd193..6e092dadb388 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0db405a869f2..291831e792af 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e222c5c26b32..304bb0a1d4f0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc4d07357e3a..a8135e1f528c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -95,7 +95,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5229365cd8b4..761984f3bd9c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a082646e6f16..766e1c7179a2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eff050ac7049..2ffe34cc2ef8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 3a13c5d1d4d2..39d5939ccd6c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include #include diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index b6761913445a..1f73d9ea434d 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92c6e8d4e731..6f92f9c62990 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -56,7 +56,7 @@ #include #include -#include +#include #include diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f49b82da8264..66f55e514b56 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -23,13 +23,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include /* remember: uninitialized global data is zeroed because its in .bss */ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 571548619469..4c644bc70eae 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f9fc48aeaf9..0f84f66018e4 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include u8 llc_mac_null_var[IFHWADDRLEN]; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 965c94eb4bbc..34228ef14985 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /** diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 162a85fed150..9aa8b14a81ab 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 9c44b3794126..2fcba9e24b2e 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include /* For ip_rcv */ #include #include diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 252c1b3ecd78..587bed2674bf 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index faabda8088be..75b72d389ba9 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5480caf8ccc2..c6e59f84c3ae 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index ef475a1bb1ba..a52417bd0a16 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 25da6f699fd0..4510cd7613ec 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index ae135e27799b..a29a3a960fd6 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 84dd4403f792..50ae0371dab8 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4a6421a9fcab..fa3be2b8fb5f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -66,8 +66,8 @@ #include #include -#include #include +#include #include #include #include diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d0c3120d0233..e750cb685cb6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d403e34088ad..bc4c44552c1f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -105,7 +105,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 46252d2807bb..6ffc64e1712d 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -76,11 +76,11 @@ #include #include #include -#include #include #include #include +#include /* Internal data structures and random procedures: */ diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index d93b19faaab7..596cb96e5f47 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 04bec047fa9a..020d73cc8414 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include /* For TIOCINQ/OUTQ */ diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b0197c70a9fc..26146874b839 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index e20cfadad4d9..8be9b8fbc24d 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include /* diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d6a21a3ad80e..0a92e1da3922 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include static void x25_heartbeat_expiry(unsigned long); From 81849d106b1fb97f8e2d311c0c4d36347def55b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:50 -0700 Subject: [PATCH 316/584] [INET]: Generalise tcp_v4_hash & tcp_unhash It really just makes the existing code be a helper function that tcp_v4_hash and tcp_unhash uses, specifying the right inet_hashinfo, tcp_hashinfo. One thing I'll investigate at some point is to have the inet_hashinfo pointer in sk_prot, so that we get all the hashtable information from the sk pointer, this can lead to some extra indirections that may well hurt performance/code size, we'll see. Ultimate idea would be that sk_prot would provide _all_ the information about a protocol implementation. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 34 ++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 29 ++--------------------------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c816708fa556..6731df2cea67 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -240,4 +240,38 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } + +static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + __inet_hash(hashinfo, sk, 1); + local_bh_enable(); + } +} + +static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + rwlock_t *lock; + + if (sk_unhashed(sk)) + goto out; + + if (sk->sk_state == TCP_LISTEN) { + local_bh_disable(); + inet_listen_wlock(hashinfo); + lock = &hashinfo->lhash_lock; + } else { + struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; + lock = &head->lock; + write_lock_bh(&head->lock); + } + + if (__sk_del_node_init(sk)) + sock_prot_dec_use(sk->sk_prot); + write_unlock_bh(lock); +out: + if (sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} #endif /* _INET_HASHTABLES_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5f9ad95304ca..dca1be67164b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -230,37 +230,12 @@ fail: static void tcp_v4_hash(struct sock *sk) { - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); - __inet_hash(&tcp_hashinfo, sk, 1); - local_bh_enable(); - } + inet_hash(&tcp_hashinfo, sk); } void tcp_unhash(struct sock *sk) { - rwlock_t *lock; - - if (sk_unhashed(sk)) - goto ende; - - if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(&tcp_hashinfo); - lock = &tcp_hashinfo.lhash_lock; - } else { - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; - lock = &head->lock; - write_lock_bh(&head->lock); - } - - if (__sk_del_node_init(sk)) - sock_prot_dec_use(sk->sk_prot); - write_unlock_bh(lock); - - ende: - if (sk->sk_state == TCP_LISTEN) - wake_up(&tcp_hashinfo.lhash_wait); + inet_unhash(&tcp_hashinfo, sk); } /* Don't inline this cruft. Here are some nice properties to From 33b62231908c58ae04185e4f1063d1e35a7c8576 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:06 -0700 Subject: [PATCH 317/584] [INET]: Generalise tcp_v4_lookup_listener [acme@toy net-2.6.14]$ grep built-in /tmp/before /tmp/after /tmp/before: 282560 13122 9312 304994 4a762 net/ipv4/built-in.o /tmp/after: 282560 13122 9312 304994 4a762 net/ipv4/built-in.o Will be used in DCCP, not exporting it right now not to get in Adrian Bunk's exported-but-not-used-on-modules radar 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 36 ++++++++++++++++ net/ipv4/inet_hashtables.c | 41 ++++++++++++++++++ net/ipv4/tcp_ipv4.c | 81 +++-------------------------------- 3 files changed, 82 insertions(+), 76 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 6731df2cea67..1c4fa0065a8e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -16,8 +16,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -274,4 +276,38 @@ out: if (sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } + +extern struct sock *__inet_lookup_listener(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif); + +/* Optimize the common listener case. */ +static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} #endif /* _INET_HASHTABLES_H */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 06cbc6f689c5..88fcba05b7d6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -121,3 +121,44 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo) } EXPORT_SYMBOL(inet_listen_wlock); + +/* + * Don't inline this cruft. Here are some nice properties to exploit here. The + * BSD API does not allow a listening sock to specify the remote port nor the + * remote address for the connection. So always assume those are both + * wildcarded during the search since they can never be otherwise. + */ +struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, + const unsigned short hnum, const int dif) +{ + struct sock *result = NULL, *sk; + const struct hlist_node *node; + int hiscore = -1; + + sk_for_each(sk, node, head) { + const struct inet_sock *inet = inet_sk(sk); + + if (inet->num == hnum && !ipv6_only_sock(sk)) { + const __u32 rcv_saddr = inet->rcv_saddr; + int score = sk->sk_family == PF_INET ? 1 : 0; + + if (rcv_saddr) { + if (rcv_saddr != daddr) + continue; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score += 2; + } + if (score == 5) + return sk; + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + } + return result; +} diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dca1be67164b..a678709b36f6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -238,78 +238,6 @@ void tcp_unhash(struct sock *sk) inet_unhash(&tcp_hashinfo, sk); } -/* Don't inline this cruft. Here are some nice properties to - * exploit here. The BSD API does not allow a listening TCP - * to specify the remote port nor the remote address for the - * connection. So always assume those are both wildcarded - * during the search since they can never be otherwise. - */ -static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - struct hlist_node *node; - int score, hiscore; - - hiscore=-1; - sk_for_each(sk, node, head) { - struct inet_sock *inet = inet_sk(sk); - - if (inet->num == hnum && !ipv6_only_sock(sk)) { - __u32 rcv_saddr = inet->rcv_saddr; - - score = (sk->sk_family == PF_INET ? 1 : 0); - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} - -/* Optimize the common listener case. */ -static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *sk = NULL; - struct hlist_head *head; - - read_lock(&tcp_hashinfo.lhash_lock); - head = &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]; - if (!hlist_empty(head)) { - struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) - goto sherry_cache; - sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); - } - if (sk) { -sherry_cache: - sock_hold(sk); - } - read_unlock(&tcp_hashinfo.lhash_lock); - return sk; -} - /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * @@ -358,7 +286,7 @@ static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, struct sock *sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif); - return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif); + return sk ? : inet_lookup_listener(&tcp_hashinfo, daddr, hnum, dif); } inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, @@ -1641,9 +1569,10 @@ do_time_wait: switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, skb, th, skb->len)) { case TCP_TW_SYN: { - struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, - ntohs(th->dest), - tcp_v4_iif(skb)); + struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, + skb->nh.iph->daddr, + ntohs(th->dest), + tcp_v4_iif(skb)); if (sk2) { tcp_tw_deschedule((struct tcp_tw_bucket *)sk); tcp_tw_put((struct tcp_tw_bucket *)sk); From 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:30 -0700 Subject: [PATCH 318/584] [INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets This paves the way to generalise the rest of the sock ID lookup routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro kernels (where IPv6 is always built as a module): [root@qemu ~]# grep tw_sock /proc/slabinfo tw_sock_TCPv6 0 0 128 31 1 tw_sock_TCP 0 0 96 41 1 [root@qemu ~]# Now if a protocol wants to use the TIME_WAIT generic infrastructure it only has to set the sk_prot->twsk_obj_size field with the size of its inet_timewait_sock derived sock and proto_register will create sk_prot->twsk_slab, for now its only for INET sockets, but we can introduce timewait_sock later if some non INET transport protocolo wants to use this stuff. Next changesets will take advantage of this new infrastructure to generalise even more TCP code. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o /tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o [acme@toy net-2.6.14]$ Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1 (qemu host)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 52 +++++++- include/linux/tcp.h | 15 +++ include/net/inet_hashtables.h | 41 +++++++ include/net/inet_timewait_sock.h | 142 ++++++++++++++++++++++ include/net/sock.h | 17 ++- include/net/tcp.h | 202 +------------------------------ net/core/sock.c | 35 +++++- net/ipv4/tcp.c | 10 -- net/ipv4/tcp_diag.c | 10 +- net/ipv4/tcp_ipv4.c | 107 ++++++++-------- net/ipv4/tcp_minisocks.c | 142 +++++++++++----------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 100 ++++++++------- 13 files changed, 484 insertions(+), 391 deletions(-) create mode 100644 include/net/inet_timewait_sock.h diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade24..98fa32316e40 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -308,6 +308,41 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) + +#include + +struct tcp6_timewait_sock { + struct tcp_timewait_sock tw_v6_sk; + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + +static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +{ + return (struct tcp6_timewait_sock *)sk; +} + +static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; +} + +static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +{ + return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; +} + +static inline int tcp_twsk_ipv6only(const struct sock *sk) +{ + return inet_twsk(sk)->tw_ipv6only; +} + +static inline int tcp_v6_ipv6only(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); +} #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 @@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#endif +#define __tcp_v6_rcv_saddr(__sk) NULL +#define tcp_v6_rcv_saddr(__sk) NULL +#define tcp_twsk_ipv6only(__sk) 0 +#define tcp_v6_ipv6only(__sk) 0 +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#endif +#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ + (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((__sk)->sk_family == AF_INET6) && \ + ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ + ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif +#endif /* __KERNEL__ */ + +#endif /* _IPV6_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b88fe05fdcbf..5d295b1b3de7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -179,6 +179,7 @@ struct tcp_info #include #include #include +#include /* This defines a selective acknowledgement block. */ struct tcp_sack_block { @@ -387,6 +388,20 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) return (struct tcp_sock *)sk; } +struct tcp_timewait_sock { + struct inet_timewait_sock tw_sk; + __u32 tw_rcv_nxt; + __u32 tw_snd_nxt; + __u32 tw_rcv_wnd; + __u32 tw_ts_recent; + long tw_ts_recent_stamp; +}; + +static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) +{ + return (struct tcp_timewait_sock *)sk; +} + static inline void *tcp_ca(const struct tcp_sock *tp) { return (void *) tp->ca_priv; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1c4fa0065a8e..c38c637e0734 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,6 +14,8 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include + #include #include #include @@ -310,4 +312,43 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } + +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport) << 16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport) << 16) | (__u32)(__sport)) +#endif + +#if (BITS_PER_LONG == 64) +#ifdef __BIG_ENDIAN +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 000000000000..ce117048f2fd --- /dev/null +++ b/include/net/inet_timewait_sock.h @@ -0,0 +1,142 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for a generic INET TIMEWAIT sock + * + * From code originally in net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_TIMEWAIT_SOCK_ +#define _INET_TIMEWAIT_SOCK_ + +#include + +#include +#include + +#include +#include + +#include + +#if (BITS_PER_LONG == 64) +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 +#else +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 +#endif + +struct inet_bind_bucket; + +/* + * This is a TIME_WAIT sock. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct inet_timewait_sock { + /* + * Now struct sock also uses sock_common, so please just + * don't add nothing before this first member (__tw_common) --acme + */ + struct sock_common __tw_common; +#define tw_family __tw_common.skc_family +#define tw_state __tw_common.skc_state +#define tw_reuse __tw_common.skc_reuse +#define tw_bound_dev_if __tw_common.skc_bound_dev_if +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node +#define tw_refcnt __tw_common.skc_refcnt +#define tw_prot __tw_common.skc_prot + volatile unsigned char tw_substate; + /* 3 bits hole, try to pack */ + unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_sock */ + __u16 tw_sport; + __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __u32 tw_rcv_saddr; + __u16 tw_dport; + __u16 tw_num; + /* And these are ours. */ + __u8 tw_ipv6only:1; + /* 31 bits hole, try to pack */ + int tw_hashent; + int tw_timeout; + unsigned long tw_ttd; + struct inet_bind_bucket *tw_tb; + struct hlist_node tw_death_node; +}; + +static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + __hlist_del(&tw->tw_death_node); + inet_twsk_dead_node_init(tw); +} + +static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + if (inet_twsk_dead_hashed(tw)) { + __inet_twsk_del_dead_node(tw); + return 1; + } + return 0; +} + +#define inet_twsk_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define inet_twsk_for_each_inmate(tw, node, jail) \ + hlist_for_each_entry(tw, node, jail, tw_death_node) + +#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + +static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) +{ + return (struct inet_timewait_sock *)sk; +} + +static inline u32 inet_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +} + +static inline void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) { +#ifdef SOCK_REFCNT_DEBUG + printk(KERN_DEBUG "%s timewait_sock %p released\n", + tw->tw_prot->name, tw); +#endif + kmem_cache_free(tw->tw_prot->twsk_slab, tw); + } +} +#endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index 391d00b5b7b4..c902c57bf2b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ } while(0) struct sock; +struct proto; /** * struct sock_common - minimal network layer representation of sockets @@ -98,10 +99,11 @@ struct sock; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header - * for struct sock and struct tcp_tw_bucket. - */ + * for struct sock and struct inet_timewait_sock. + */ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; @@ -110,11 +112,12 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + struct proto *skc_prot; }; /** * struct sock - network layer representation of sockets - * @__sk_common: shared layout with tcp_tw_bucket + * @__sk_common: shared layout with inet_timewait_sock * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer @@ -140,7 +143,6 @@ struct sock_common { * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used - * @sk_prot: protocol handlers inside a network family * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' @@ -173,7 +175,7 @@ struct sock_common { */ struct sock { /* - * Now struct tcp_tw_bucket also uses sock_common, so please just + * Now struct inet_timewait_sock also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; @@ -184,6 +186,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -218,7 +221,6 @@ struct sock { struct sk_buff *tail; } sk_backlog; struct sk_buff_head sk_error_queue; - struct proto *sk_prot; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, @@ -557,6 +559,9 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c8..cf8e664176ad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -38,207 +38,14 @@ #include #include -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -#include -#endif #include extern struct inet_hashinfo tcp_hashinfo; -#if (BITS_PER_LONG == 64) -#define TCP_ADDRCMP_ALIGN_BYTES 8 -#else -#define TCP_ADDRCMP_ALIGN_BYTES 4 -#endif - -/* This is a TIME_WAIT bucket. It works around the memory consumption - * problems of sockets in such a state on heavily loaded servers, but - * without violating the protocol specification. - */ -struct tcp_tw_bucket { - /* - * Now struct sock also uses sock_common, so please just - * don't add nothing before this first member (__tw_common) --acme - */ - struct sock_common __tw_common; -#define tw_family __tw_common.skc_family -#define tw_state __tw_common.skc_state -#define tw_reuse __tw_common.skc_reuse -#define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node -#define tw_bind_node __tw_common.skc_bind_node -#define tw_refcnt __tw_common.skc_refcnt - volatile unsigned char tw_substate; - unsigned char tw_rcv_wscale; - __u16 tw_sport; - /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ - __u32 tw_daddr - __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; - __u16 tw_num; - /* And these are ours. */ - int tw_hashent; - int tw_timeout; - __u32 tw_rcv_nxt; - __u32 tw_snd_nxt; - __u32 tw_rcv_wnd; - __u32 tw_ts_recent; - long tw_ts_recent_stamp; - unsigned long tw_ttd; - struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; - int tw_v6_ipv6only; -#endif -}; - -static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_node, list); -} - -static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) -{ - return tw->tw_death_node.pprev != NULL; -} - -static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - __hlist_del(&tw->tw_death_node); - tw_dead_node_init(tw); -} - -static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - if (tw_dead_hashed(tw)) { - __tw_del_dead_node(tw); - return 1; - } - return 0; -} - -#define tw_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) - -#define tw_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) - -#define tw_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) - -#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) - -static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; -} - -#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) - -static inline int tcp_v6_ipv6only(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); -} -#else -# define __tcp_v6_rcv_saddr(__sk) NULL -# define tcp_v6_rcv_saddr(__sk) NULL -# define tcptw_sk_ipv6only(__sk) 0 -# define tcp_v6_ipv6only(__sk) 0 -#endif - -extern kmem_cache_t *tcp_timewait_cachep; - -static inline void tcp_tw_put(struct tcp_tw_bucket *tw) -{ - if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "tw_bucket %p released\n", tw); -#endif - kmem_cache_free(tcp_timewait_cachep, tw); - } -} - extern atomic_t tcp_orphan_count; extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); - - -/* Socket demux engine toys. */ -#ifdef __BIG_ENDIAN -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport)<<16) | (__u32)(__dport)) -#else /* __LITTLE_ENDIAN */ -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport)<<16) | (__u32)(__sport)) -#endif - -#if (BITS_PER_LONG == 64) -#ifdef __BIG_ENDIAN -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); -#else /* __LITTLE_ENDIAN */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); -#endif /* __BIG_ENDIAN */ -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#else /* 32-bit arch */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ - (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif /* 64-bit arch */ - -#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); extern int tcp_v4_remember_stamp(struct sock *sk); -extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); @@ -616,10 +423,9 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, diff --git a/net/core/sock.c b/net/core/sock.c index a1a23be10aa3..aba31fedf2ac 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1378,7 +1378,8 @@ static LIST_HEAD(proto_list); int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name; + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; int rc = -ENOBUFS; if (alloc_slab) { @@ -1409,6 +1410,23 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab_name; } } + + if (prot->twsk_obj_size) { + static const char mask[] = "tw_sock_%s"; + + timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + + if (timewait_sock_slab_name == NULL) + goto out_free_request_sock_slab; + + sprintf(timewait_sock_slab_name, mask, prot->name); + prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, + prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_slab == NULL) + goto out_free_timewait_sock_slab_name; + } } write_lock(&proto_list_lock); @@ -1417,6 +1435,13 @@ int proto_register(struct proto *prot, int alloc_slab) rc = 0; out: return rc; +out_free_timewait_sock_slab_name: + kfree(timewait_sock_slab_name); +out_free_request_sock_slab: + if (prot->rsk_prot && prot->rsk_prot->slab) { + kmem_cache_destroy(prot->rsk_prot->slab); + prot->rsk_prot->slab = NULL; + } out_free_request_sock_slab_name: kfree(request_sock_slab_name); out_free_sock_slab: @@ -1444,6 +1469,14 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } + if (prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_slab); + + kmem_cache_destroy(prot->twsk_slab); + kfree(name); + prot->twsk_slab = NULL; + } + list_del(&prot->node); write_unlock(&proto_list_lock); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2f4b1a374bb7..f1a708bf7a97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,8 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); -kmem_cache_t *tcp_timewait_cachep; - atomic_t tcp_orphan_count = ATOMIC_INIT(0); int sysctl_tcp_mem[3]; @@ -2264,13 +2262,6 @@ void __init tcp_init(void) if (!tcp_hashinfo.bind_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); - tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", - sizeof(struct tcp_tw_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!tcp_timewait_cachep) - panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); - /* Size and allocate the main established and bind bucket * hash tables. * @@ -2363,4 +2354,3 @@ EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_statistics); -EXPORT_SYMBOL(tcp_timewait_cachep); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 1a89a03c449b..6f2d6f2276b9 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,7 +81,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); if (r->tcpdiag_state == TCP_TIME_WAIT) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); long tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; @@ -99,10 +99,12 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_inode = 0; #ifdef CONFIG_IP_TCPDIAG_IPV6 if (r->tcpdiag_family == AF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, - &tw->tw_v6_rcv_saddr); + &tcp6tw->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, - &tw->tw_v6_daddr); + &tcp6tw->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -239,7 +241,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) out: if (sk) { if (sk->sk_state == TCP_TIME_WAIT) - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); else sock_put(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a678709b36f6..ce423e48ebe0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -106,7 +106,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); + const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -119,7 +119,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; @@ -251,10 +251,10 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, const int dif) { struct inet_ehash_bucket *head; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - struct hlist_node *node; + const struct hlist_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -262,13 +262,13 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; } sk = NULL; @@ -313,27 +313,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) /* called with local bh disabled */ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); u32 daddr = inet->rcv_saddr; u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket *)sk2; + tw = inet_twsk(sk2); - if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); /* With PAWS, it is safe from the viewpoint @@ -350,15 +351,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, fall back to VJ's scheme and use initial timestamp retrieved from peer table. */ - if (tw->tw_ts_recent_stamp && + if (tcptw->tw_ts_recent_stamp && (!twp || (sysctl_tcp_tw_reuse && xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { - if ((tp->write_seq = - tw->tw_snd_nxt + 65535 + 2) == 0) + tcptw->tw_ts_recent_stamp > 1))) { + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; + if (tp->write_seq == 0) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -369,7 +370,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -392,7 +393,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -429,7 +430,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + connect_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -482,7 +483,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -757,7 +758,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1002,12 +1003,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1368,7 +1370,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket *)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1557,25 +1559,25 @@ discard_and_relse: do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } - switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (sk2) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -1639,18 +1641,18 @@ int tcp_v4_remember_stamp(struct sock *sk) return 0; } -int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) +int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = NULL; - - peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); if (peer) { - if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || + const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && - peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; - peer->tcp_ts = tw->tw_ts_recent; + peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; } inet_putpeer(peer); return 1; @@ -1758,13 +1760,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) { return hlist_empty(head) ? NULL : - list_entry(head->first, struct tcp_tw_bucket, tw_node); + list_entry(head->first, struct inet_timewait_sock, tw_node); } -static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) +static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { return tw->tw_node.next ? hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; @@ -1860,7 +1862,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; struct hlist_node *node; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; /* We can reschedule _before_ having picked the target: */ cond_resched_softirq(); @@ -1874,8 +1876,8 @@ static void *established_get_first(struct seq_file *seq) goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { + inet_twsk_for_each(tw, node, + &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { if (tw->tw_family != st->family) { continue; } @@ -1892,7 +1894,7 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; struct tcp_iter_state* st = seq->private; @@ -2159,7 +2161,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } -static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) +static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { unsigned int dest, src; __u16 destp, srcp; @@ -2261,6 +2263,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .twsk_obj_size = sizeof(struct tcp_timewait_sock), .rsk_prot = &tcp_request_sock_ops, }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f29e2f6ebe1b..5b5a49335fbb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -41,7 +41,7 @@ int sysctl_tcp_max_tw_buckets = NR_FILE*2; int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); +static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -58,7 +58,7 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ -static void tcp_timewait_kill(struct tcp_tw_bucket *tw) +static void tcp_timewait_kill(struct inet_timewait_sock *tw) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -85,11 +85,11 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, - atomic_read(&tw->tw_refcnt)); + printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", + tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - tcp_tw_put(tw); + inet_twsk_put(tw); } /* @@ -121,19 +121,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) * to avoid misread sequence numbers, states etc. --ANK */ enum tcp_tw_status -tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, - struct tcphdr *th, unsigned len) +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, + const struct tcphdr *th) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; int paws_reject = 0; tmp_opt.saw_tstamp = 0; - if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) { + if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tcp_parse_options(skb, &tmp_opt, 0); if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = tw->tw_ts_recent; - tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tmp_opt.ts_recent = tcptw->tw_ts_recent; + tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_check(&tmp_opt, th->rst); } } @@ -144,20 +145,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, /* Out of window, send ACK */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tw->tw_rcv_nxt, - tw->tw_rcv_nxt + tw->tw_rcv_wnd)) + tcptw->tw_rcv_nxt, + tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) return TCP_TW_ACK; if (th->rst) goto kill; - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) || + if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -165,19 +166,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) { + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_RST; } /* FIN arrived, enter true time-wait state. */ - tw->tw_substate = TCP_TIME_WAIT; - tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tw->tw_substate = TCP_TIME_WAIT; + tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent_stamp = xtime.tv_sec; - tw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } /* I am shamed, but failed to make it more elegant. @@ -186,7 +187,7 @@ kill_with_rst: * do not undertsnad recycling in any case, it not * a big problem in practice. --ANK */ if (tw->tw_family == AF_INET && - sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp && + sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) tcp_tw_schedule(tw, tw->tw_timeout); else @@ -212,7 +213,7 @@ kill_with_rst: */ if (!paws_reject && - (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt && + (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { /* In window segment, it may be only reset or bare ack. */ @@ -224,18 +225,18 @@ kill_with_rst: if (sysctl_tcp_rfc1337 == 0) { kill: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } } tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent = tmp_opt.rcv_tsval; - tw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -257,9 +258,10 @@ kill: */ if (th->syn && !th->rst && !th->ack && !paws_reject && - (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) || - (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { - u32 isn = tw->tw_snd_nxt + 65535 + 2; + (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || + (tmp_opt.saw_tstamp && + (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { + u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; TCP_SKB_CB(skb)->when = isn; @@ -284,7 +286,7 @@ kill: */ return TCP_TW_ACK; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -293,7 +295,7 @@ kill: * relevant info into it from the SK, and mess with hash chains * and list linkage. */ -static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) +static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw) { const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -306,7 +308,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) spin_lock(&bhead->lock); tw->tw_tb = inet->bind_hash; BUG_TRAP(inet->bind_hash); - tw_add_bind_node(tw, &tw->tw_tb->owners); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); write_lock(&ehead->lock); @@ -316,7 +318,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->sk_prot); /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - tw_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); + inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); @@ -327,19 +329,23 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct tcp_tw_bucket *tw = NULL; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw = NULL; + const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tp->af_specific->remember_stamp(sk); if (tcp_tw_count < sysctl_tcp_max_tw_buckets) - tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, SLAB_ATOMIC); - if(tw != NULL) { - struct inet_sock *inet = inet_sk(sk); - int rto = (tp->rto<<2) - (tp->rto>>1); + if (tw != NULL) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + const struct inet_sock *inet = inet_sk(sk); + const int rto = (tp->rto << 2) - (tp->rto >> 1); + + /* Remember our protocol */ + tw->tw_prot = sk->sk_prot_creator; /* Give us an identity. */ tw->tw_daddr = inet->daddr; @@ -356,25 +362,23 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) atomic_set(&tw->tw_refcnt, 1); tw->tw_hashent = sk->sk_hashent; - tw->tw_rcv_nxt = tp->rcv_nxt; - tw->tw_snd_nxt = tp->snd_nxt; - tw->tw_rcv_wnd = tcp_receive_window(tp); - tw->tw_ts_recent = tp->rx_opt.ts_recent; - tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; - tw_dead_node_init(tw); + tcptw->tw_rcv_nxt = tp->rcv_nxt; + tcptw->tw_snd_nxt = tp->snd_nxt; + tcptw->tw_rcv_wnd = tcp_receive_window(tp); + tcptw->tw_ts_recent = tp->rx_opt.ts_recent; + tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + inet_twsk_dead_node_init(tw); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr); - tw->tw_v6_ipv6only = np->ipv6only; - } else { - memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr)); - memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr)); - tw->tw_v6_ipv6only = 0; - } + ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); + ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_ipv6only = np->ipv6only; + } else + tw->tw_ipv6only = 0; #endif /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); @@ -392,7 +396,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } tcp_tw_schedule(tw, timeo); - tcp_tw_put(tw); + inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than @@ -427,7 +431,7 @@ static u32 twkill_thread_slots; /* Returns non-zero if quota exceeded. */ static int tcp_do_twkill_work(int slot, unsigned int quota) { - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; unsigned int killed; int ret; @@ -441,11 +445,11 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) killed = 0; ret = 0; rescan: - tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { - __tw_del_dead_node(tw); + inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { + __inet_twsk_del_dead_node(tw); spin_unlock(&tw_death_lock); tcp_timewait_kill(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); killed++; spin_lock(&tw_death_lock); if (killed > quota) { @@ -531,11 +535,11 @@ static void twkill_work(void *dummy) */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void tcp_tw_deschedule(struct tcp_tw_bucket *tw) +void tcp_tw_deschedule(struct inet_timewait_sock *tw) { spin_lock(&tw_death_lock); - if (tw_del_dead_node(tw)) { - tcp_tw_put(tw); + if (inet_twsk_del_dead_node(tw)) { + inet_twsk_put(tw); if (--tcp_tw_count == 0) del_timer(&tcp_tw_timer); } @@ -552,7 +556,7 @@ static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) +static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) { struct hlist_head *list; int slot; @@ -586,7 +590,7 @@ static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) spin_lock(&tw_death_lock); /* Unlink it, if it was scheduled */ - if (tw_del_dead_node(tw)) + if (inet_twsk_del_dead_node(tw)) tcp_tw_count--; else atomic_inc(&tw->tw_refcnt); @@ -644,13 +648,13 @@ void tcp_twcal_tick(unsigned long dummy) for (n=0; nrcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = tcp_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93a66b9a76e1..af8ad5bb273b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -308,33 +308,32 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct inet_ehash_bucket *head; struct sock *sk; - struct hlist_node *node; - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); - int hash; - + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - head = &tcp_hashinfo.ehash[hash]; + const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - /* FIXME: acme: check this... */ - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } } @@ -455,43 +454,46 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } static int __tcp_v6_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; int dif = sk->sk_bound_dev_if; - u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket*)sk2; + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); + + tw = inet_twsk(sk2); if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); - if (tw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { + if (tcptw->tw_ts_recent_stamp && + (!twp || + (sysctl_tcp_tw_reuse && + xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { /* See comment in tcp_ipv4.c */ - tp->write_seq = tw->tw_snd_nxt + 65535 + 2; + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (!tp->write_seq) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -502,7 +504,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) goto not_unique; } @@ -521,7 +523,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -556,7 +558,7 @@ static int tcp_v6_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + tcpv6_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -609,7 +611,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -845,7 +847,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1223,12 +1225,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1261,7 +1265,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket*)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1798,26 +1802,26 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } - switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2; sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -2137,17 +2141,18 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) } static void get_timewait6_sock(struct seq_file *seq, - struct tcp_tw_bucket *tw, int i) + struct inet_timewait_sock *tw, int i) { struct in6_addr *dest, *src; __u16 destp, srcp; + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tw->tw_v6_daddr; - src = &tw->tw_v6_rcv_saddr; + dest = &tcp6tw->tw_v6_daddr; + src = &tcp6tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -2244,6 +2249,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .rsk_prot = &tcp6_request_sock_ops, }; From e48c414ee61f4ac8d5cff2973e66a7cbc8a93aa5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:46 -0700 Subject: [PATCH 319/584] [INET]: Generalise the TCP sock ID lookup routines And also some TIME_WAIT functions. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 282955 13122 9312 305389 4a8ed net/ipv4/built-in.o /tmp/after.size: 281566 13122 9312 304000 4a380 net/ipv4/built-in.o [acme@toy net-2.6.14]$ I kept them still inlined, will uninline at some point to see what would be the performance difference. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 77 +++++++++++++++++++++++++++-- include/net/inet_timewait_sock.h | 9 ++++ include/net/sock.h | 12 ++--- net/ipv4/Makefile | 1 + net/ipv4/inet_hashtables.c | 2 + net/ipv4/inet_timewait_sock.c | 83 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 8 ++- net/ipv4/tcp_ipv4.c | 83 +++----------------------------- net/ipv4/tcp_minisocks.c | 78 ++---------------------------- 9 files changed, 188 insertions(+), 165 deletions(-) create mode 100644 net/ipv4/inet_timewait_sock.c diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c38c637e0734..b5c0d64ea741 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -30,6 +30,7 @@ #include #include +#include /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is @@ -285,13 +286,13 @@ extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const int dif); /* Optimize the common listener case. */ -static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, - const unsigned short hnum, - const int dif) +static inline struct sock * + inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, const int dif) { struct sock *sk = NULL; - struct hlist_head *head; + const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; @@ -351,4 +352,70 @@ sherry_cache: ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #endif /* 64-bit arch */ + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need + * not check it for lookups anymore, thanks Alexey. -DaveM + * + * Local BH must be disabled here. + */ +static inline struct sock * + __inet_lookup_established(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + struct sock *sk; + const struct hlist_node *node; + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; + } + sk = NULL; +out: + read_unlock(&head->lock); + return sk; +hit: + sock_hold(sk); + goto out; +} + +static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, + hnum, dif); + return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); +} + +static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 dport, + const int dif) +{ + struct sock *sk; + + local_bh_disable(); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + local_bh_enable(); + + return sk; +} #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index ce117048f2fd..020f28058fd4 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -17,6 +17,7 @@ #include +#include #include #include @@ -32,6 +33,7 @@ #endif struct inet_bind_bucket; +struct inet_hashinfo; /* * This is a TIME_WAIT sock. It works around the memory consumption @@ -139,4 +141,11 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) kmem_cache_free(tw->tw_prot->twsk_slab, tw); } } + +extern void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); + +extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo); #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index c902c57bf2b7..bdae0a5eadf5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -255,28 +255,28 @@ struct sock { /* * Hashed lists helper routines */ -static inline struct sock *__sk_head(struct hlist_head *head) +static inline struct sock *__sk_head(const struct hlist_head *head) { return hlist_entry(head->first, struct sock, sk_node); } -static inline struct sock *sk_head(struct hlist_head *head) +static inline struct sock *sk_head(const struct hlist_head *head) { return hlist_empty(head) ? NULL : __sk_head(head); } -static inline struct sock *sk_next(struct sock *sk) +static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } -static inline int sk_unhashed(struct sock *sk) +static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); } -static inline int sk_hashed(struct sock *sk) +static inline int sk_hashed(const struct sock *sk) { return sk->sk_node.pprev != NULL; } @@ -494,7 +494,7 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; /* Here is the right place to enable sock refcounting debugging */ -#define SOCK_REFCNT_DEBUG +//#define SOCK_REFCNT_DEBUG /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 2d8d30e83eb0..6650d18e400f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -5,6 +5,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ + inet_timewait_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 88fcba05b7d6..d94e962958a4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -162,3 +162,5 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad } return result; } + +EXPORT_SYMBOL_GPL(__inet_lookup_listener); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c new file mode 100644 index 000000000000..d38d160faeb7 --- /dev/null +++ b/net/ipv4/inet_timewait_sock.c @@ -0,0 +1,83 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic TIME_WAIT sockets functions + * + * From code orinally in TCP + */ + +#include + +#include +#include + +/* Must be called with locally disabled BHs. */ +void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) +{ + struct inet_bind_hashbucket *bhead; + struct inet_bind_bucket *tb; + /* Unlink from established hashes. */ + struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + + write_lock(&ehead->lock); + if (hlist_unhashed(&tw->tw_node)) { + write_unlock(&ehead->lock); + return; + } + __hlist_del(&tw->tw_node); + sk_node_init(&tw->tw_node); + write_unlock(&ehead->lock); + + /* Disassociate with bind bucket. */ + bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tb = tw->tw_tb; + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + spin_unlock(&bhead->lock); +#ifdef SOCK_REFCNT_DEBUG + if (atomic_read(&tw->tw_refcnt) != 1) { + printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", + tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); + } +#endif + inet_twsk_put(tw); +} + +/* + * Enter the time wait state. This is called with locally disabled BH. + * Essentially we whip up a timewait bucket, copy the relevant info into it + * from the SK, and mess with hash chains and list linkage. + */ +void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo) +{ + const struct inet_sock *inet = inet_sk(sk); + struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_bind_hashbucket *bhead; + /* Step 1: Put TW into bind hash. Original socket stays there too. + Note, that any socket with inet->num != 0 MUST be bound in + binding cache, even if it is closed. + */ + bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tw->tw_tb = inet->bind_hash; + BUG_TRAP(inet->bind_hash); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + spin_unlock(&bhead->lock); + + write_lock(&ehead->lock); + + /* Step 2: Remove SK from established hash. */ + if (__sk_del_node_init(sk)) + sock_prot_dec_use(sk->sk_prot); + + /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ + inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain); + atomic_inc(&tw->tw_refcnt); + + write_unlock(&ehead->lock); +} diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 6f2d6f2276b9..60c6a797cc50 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -174,8 +174,6 @@ nlmsg_failure: return -1; } -extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, - int dif); #ifdef CONFIG_IP_TCPDIAG_IPV6 extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 dport, @@ -197,9 +195,9 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sk_buff *rep; if (req->tcpdiag_family == AF_INET) { - sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, - req->id.tcpdiag_src[0], req->id.tcpdiag_sport, - req->id.tcpdiag_if); + sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0], + req->id.tcpdiag_dport, req->id.tcpdiag_src[0], + req->id.tcpdiag_sport, req->id.tcpdiag_if); } #ifdef CONFIG_IP_TCPDIAG_IPV6 else if (req->tcpdiag_family == AF_INET6) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ce423e48ebe0..e7e91e60ac74 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -238,71 +238,6 @@ void tcp_unhash(struct sock *sk) inet_unhash(&tcp_hashinfo, sk); } -/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so - * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. - */ - -static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, - const u16 sport, - const u32 daddr, - const u16 hnum, - const int dif) -{ - struct inet_ehash_bucket *head; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); - struct sock *sk; - const struct hlist_node *node; - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size); - head = &tcp_hashinfo.ehash[hash]; - read_lock(&head->lock); - sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ - } - - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; - } - sk = NULL; -out: - read_unlock(&head->lock); - return sk; -hit: - sock_hold(sk); - goto out; -} - -static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 hnum, int dif) -{ - struct sock *sk = __tcp_v4_lookup_established(saddr, sport, - daddr, hnum, dif); - - return sk ? : inet_lookup_listener(&tcp_hashinfo, daddr, hnum, dif); -} - -inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, - u16 dport, int dif) -{ - struct sock *sk; - - local_bh_disable(); - sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif); - local_bh_enable(); - - return sk; -} - -EXPORT_SYMBOL_GPL(tcp_v4_lookup); - static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) { return secure_tcp_sequence_number(skb->nh.iph->daddr, @@ -751,8 +686,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, - th->source, tcp_v4_iif(skb)); + sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, + th->source, tcp_v4_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1359,11 +1294,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, - th->source, - skb->nh.iph->daddr, - ntohs(th->dest), - tcp_v4_iif(skb)); + nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + ntohs(th->dest), tcp_v4_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1505,9 +1438,9 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, + skb->nh.iph->daddr, ntohs(th->dest), + tcp_v4_iif(skb)); if (!sk) goto no_tcp_socket; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5b5a49335fbb..4112f7a6d108 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -56,42 +56,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) int tcp_tw_count; - -/* Must be called with locally disabled BHs. */ -static void tcp_timewait_kill(struct inet_timewait_sock *tw) -{ - struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; - /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[tw->tw_hashent]; - - write_lock(&ehead->lock); - if (hlist_unhashed(&tw->tw_node)) { - write_unlock(&ehead->lock); - return; - } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); - write_unlock(&ehead->lock); - - /* Disassociate with bind bucket. */ - bhead = &tcp_hashinfo.bhash[inet_bhashfn(tw->tw_num, tcp_hashinfo.bhash_size)]; - spin_lock(&bhead->lock); - tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(tcp_hashinfo.bind_bucket_cachep, tb); - spin_unlock(&bhead->lock); - -#ifdef SOCK_REFCNT_DEBUG - if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); - } -#endif - inet_twsk_put(tw); -} - /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -290,40 +254,6 @@ kill: return TCP_TW_SUCCESS; } -/* Enter the time wait state. This is called with locally disabled BH. - * Essentially we whip up a timewait bucket, copy the - * relevant info into it from the SK, and mess with hash chains - * and list linkage. - */ -static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw) -{ - const struct inet_sock *inet = inet_sk(sk); - struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; - struct inet_bind_hashbucket *bhead; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet->num != 0 MUST be bound in - binding cache, even if it is closed. - */ - bhead = &tcp_hashinfo.bhash[inet_bhashfn(inet->num, tcp_hashinfo.bhash_size)]; - spin_lock(&bhead->lock); - tw->tw_tb = inet->bind_hash; - BUG_TRAP(inet->bind_hash); - inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); - spin_unlock(&bhead->lock); - - write_lock(&ehead->lock); - - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_dec_use(sk->sk_prot); - - /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); - atomic_inc(&tw->tw_refcnt); - - write_unlock(&ehead->lock); -} - /* * Move a socket to time-wait or dead fin-wait-2 state. */ @@ -381,7 +311,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = 0; #endif /* Linkage updates. */ - __tcp_tw_hashdance(sk, tw); + __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -448,7 +378,7 @@ rescan: inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&tw_death_lock); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); inet_twsk_put(tw); killed++; spin_lock(&tw_death_lock); @@ -544,7 +474,7 @@ void tcp_tw_deschedule(struct inet_timewait_sock *tw) del_timer(&tcp_tw_timer); } spin_unlock(&tw_death_lock); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); } /* Short-time timewait calendar */ @@ -653,7 +583,7 @@ void tcp_twcal_tick(unsigned long dummy) inet_twsk_for_each_inmate_safe(tw, node, safe, &tcp_twcal_row[slot]) { __inet_twsk_del_dead_node(tw); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); inet_twsk_put(tw); killed++; } From c676270bcd25015b978722ec0352c330dcc87883 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:59 -0700 Subject: [PATCH 320/584] [INET_TWSK]: Introduce inet_twsk_alloc With the parts of tcp_time_wait that are not TCP specific, tcp_time_wait uses it and so will dccp_time_wait. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +++ net/ipv4/inet_timewait_sock.c | 28 ++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 24 ++---------------------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 020f28058fd4..e00861b16696 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -142,6 +142,9 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) } } +extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + const int state); + extern void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d38d160faeb7..ceb577c74237 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -81,3 +81,31 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, write_unlock(&ehead->lock); } + +struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) +{ + struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, + SLAB_ATOMIC); + if (tw != NULL) { + const struct inet_sock *inet = inet_sk(sk); + + /* Give us an identity. */ + tw->tw_daddr = inet->daddr; + tw->tw_rcv_saddr = inet->rcv_saddr; + tw->tw_bound_dev_if = sk->sk_bound_dev_if; + tw->tw_num = inet->num; + tw->tw_state = TCP_TIME_WAIT; + tw->tw_substate = state; + tw->tw_sport = inet->sport; + tw->tw_dport = inet->dport; + tw->tw_family = sk->sk_family; + tw->tw_reuse = sk->sk_reuse; + tw->tw_hashent = sk->sk_hashent; + tw->tw_ipv6only = 0; + tw->tw_prot = sk->sk_prot_creator; + atomic_set(&tw->tw_refcnt, 1); + inet_twsk_dead_node_init(tw); + } + + return tw; +} diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4112f7a6d108..66ce1790a94f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -267,37 +267,18 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) recycle_ok = tp->af_specific->remember_stamp(sk); if (tcp_tw_count < sysctl_tcp_max_tw_buckets) - tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, SLAB_ATOMIC); + tw = inet_twsk_alloc(sk, state); if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const struct inet_sock *inet = inet_sk(sk); const int rto = (tp->rto << 2) - (tp->rto >> 1); - /* Remember our protocol */ - tw->tw_prot = sk->sk_prot_creator; - - /* Give us an identity. */ - tw->tw_daddr = inet->daddr; - tw->tw_rcv_saddr = inet->rcv_saddr; - tw->tw_bound_dev_if = sk->sk_bound_dev_if; - tw->tw_num = inet->num; - tw->tw_state = TCP_TIME_WAIT; - tw->tw_substate = state; - tw->tw_sport = inet->sport; - tw->tw_dport = inet->dport; - tw->tw_family = sk->sk_family; - tw->tw_reuse = sk->sk_reuse; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; - atomic_set(&tw->tw_refcnt, 1); - - tw->tw_hashent = sk->sk_hashent; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; - inet_twsk_dead_node_init(tw); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { @@ -307,8 +288,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); tw->tw_ipv6only = np->ipv6only; - } else - tw->tw_ipv6only = 0; + } #endif /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); From 87d11ceb9deb7a3f13fdee6e89d9bb6be7d27a71 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:12 -0700 Subject: [PATCH 321/584] [SOCK]: Introduce sk_clone Out of tcp_create_openreq_child, will be used in dccp_create_openreq_child, and is a nice sock function anyway. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 74 ++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 70 ++----------------------------------- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index bdae0a5eadf5..828dc082fcb7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -737,6 +737,8 @@ extern struct sock *sk_alloc(int family, unsigned int __nocast priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); +extern struct sock *sk_clone(const struct sock *sk, + const unsigned int __nocast priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, diff --git a/net/core/sock.c b/net/core/sock.c index aba31fedf2ac..ccd10fd65682 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -700,6 +700,80 @@ void sk_free(struct sock *sk) module_put(owner); } +struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority) +{ + struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + + if (newsk != NULL) { + struct sk_filter *filter; + + memcpy(newsk, sk, sk->sk_prot->obj_size); + + /* SANITY */ + sk_node_init(&newsk->sk_node); + sock_lock_init(newsk); + bh_lock_sock(newsk); + + atomic_set(&newsk->sk_rmem_alloc, 0); + atomic_set(&newsk->sk_wmem_alloc, 0); + atomic_set(&newsk->sk_omem_alloc, 0); + skb_queue_head_init(&newsk->sk_receive_queue); + skb_queue_head_init(&newsk->sk_write_queue); + + rwlock_init(&newsk->sk_dst_lock); + rwlock_init(&newsk->sk_callback_lock); + + newsk->sk_dst_cache = NULL; + newsk->sk_wmem_queued = 0; + newsk->sk_forward_alloc = 0; + newsk->sk_send_head = NULL; + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + sock_reset_flag(newsk, SOCK_DONE); + skb_queue_head_init(&newsk->sk_error_queue); + + filter = newsk->sk_filter; + if (filter != NULL) + sk_filter_charge(newsk, filter); + + if (unlikely(xfrm_sk_clone_policy(newsk))) { + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + newsk = NULL; + goto out; + } + + newsk->sk_err = 0; + newsk->sk_priority = 0; + atomic_set(&newsk->sk_refcnt, 2); + + /* + * Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy). + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + newsk->sk_socket = NULL; + newsk->sk_sleep = NULL; + + if (newsk->sk_prot->sockets_allocated) + atomic_inc(newsk->sk_prot->sockets_allocated); + } +out: + return newsk; +} + +EXPORT_SYMBOL_GPL(sk_clone); + void __init sk_init(void) { if (num_physpages <= 4096) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 66ce1790a94f..8b6cd8d80662 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -599,67 +599,26 @@ out: */ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) { - /* allocate the newsk from the same slab of the master sock, - * if not, at sk_free time we'll try to free it from the wrong - * slabcache (i.e. is it TCPv4 or v6?), this is handled thru sk->sk_prot -acme */ - struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); + struct sock *newsk = sk_clone(sk, GFP_ATOMIC); - if(newsk != NULL) { + if (newsk != NULL) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_sock *newinet = inet_sk(newsk); struct tcp_sock *newtp; - struct sk_filter *filter; - memcpy(newsk, sk, sizeof(struct tcp_sock)); newsk->sk_state = TCP_SYN_RECV; - - /* SANITY */ - sk_node_init(&newsk->sk_node); newinet->bind_hash = NULL; /* Clone the TCP header template */ newinet->dport = ireq->rmt_port; - - sock_lock_init(newsk); - bh_lock_sock(newsk); - - rwlock_init(&newsk->sk_dst_lock); - newsk->sk_dst_cache = NULL; - atomic_set(&newsk->sk_rmem_alloc, 0); - skb_queue_head_init(&newsk->sk_receive_queue); - atomic_set(&newsk->sk_wmem_alloc, 0); - skb_queue_head_init(&newsk->sk_write_queue); - atomic_set(&newsk->sk_omem_alloc, 0); - newsk->sk_wmem_queued = 0; - newsk->sk_forward_alloc = 0; - - sock_reset_flag(newsk, SOCK_DONE); - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; - newsk->sk_send_head = NULL; - rwlock_init(&newsk->sk_callback_lock); - skb_queue_head_init(&newsk->sk_error_queue); newsk->sk_write_space = sk_stream_write_space; - if ((filter = newsk->sk_filter) != NULL) - sk_filter_charge(newsk, filter); - - if (unlikely(xfrm_sk_clone_policy(newsk))) { - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; - sk_free(newsk); - return NULL; - } - /* Now setup tcp_sock */ newtp = tcp_sk(newsk); newtp->pred_flags = 0; newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->snd_nxt = treq->snt_isn + 1; - newtp->snd_una = treq->snt_isn + 1; - newtp->snd_sml = treq->snt_isn + 1; + newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1; tcp_prequeue_init(newtp); @@ -710,32 +669,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); - /* Back to base struct sock members. */ - newsk->sk_err = 0; - newsk->sk_priority = 0; - atomic_set(&newsk->sk_refcnt, 2); - - /* - * Increment the counter in the same struct proto as the master - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that - * is the same as sk->sk_prot->socks, as this field was copied - * with memcpy), same rationale as the first comment in this - * function. - * - * This _changes_ the previous behaviour, where - * tcp_create_openreq_child always was incrementing the - * equivalent to tcp_prot->socks (inet_sock_nr), so this have - * to be taken into account in all callers. -acme - */ - sk_refcnt_debug_inc(newsk); - - atomic_inc(&tcp_sockets_allocated); - if (sock_flag(newsk, SOCK_KEEPOPEN)) tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); - newsk->sk_socket = NULL; - newsk->sk_sleep = NULL; newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { From 463c84b97f24010a67cd871746d6a7e4c925a5f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:42 -0700 Subject: [PATCH 322/584] [NET]: Introduce inet_connection_sock This creates struct inet_connection_sock, moving members out of struct tcp_sock that are shareable with other INET connection oriented protocols, such as DCCP, that in my private tree already uses most of these members. The functions that operate on these members were renamed, using a inet_csk_ prefix while not being moved yet to a new file, so as to ease the review of these changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 - include/linux/ipv6.h | 8 +- include/linux/tcp.h | 39 +---- include/net/inet_connection_sock.h | 86 ++++++++++ include/net/inet_hashtables.h | 6 +- include/net/request_sock.h | 6 +- include/net/sock.h | 3 - include/net/tcp.h | 222 ++++++++++++------------ include/net/tcp_ecn.h | 2 +- net/ipv4/inet_hashtables.c | 15 +- net/ipv4/inet_timewait_sock.c | 5 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 90 +++++----- net/ipv4/tcp_diag.c | 21 +-- net/ipv4/tcp_input.c | 266 +++++++++++++++-------------- net/ipv4/tcp_ipv4.c | 158 +++++++++-------- net/ipv4/tcp_minisocks.c | 28 +-- net/ipv4/tcp_output.c | 86 +++++----- net/ipv4/tcp_timer.c | 179 ++++++++++--------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 54 +++--- 21 files changed, 692 insertions(+), 588 deletions(-) create mode 100644 include/net/inet_connection_sock.h diff --git a/include/linux/ip.h b/include/linux/ip.h index 2c54bbd3da76..33e8a19a1a0f 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,7 +128,6 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } -struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -158,7 +157,6 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ - struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 98fa32316e40..88591913c94f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,15 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int tcp_twsk_ipv6only(const struct sock *sk) +static inline int inet_twsk_ipv6only(const struct sock *sk) { return inet_twsk(sk)->tw_ipv6only; } -static inline int tcp_v6_ipv6only(const struct sock *sk) +static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); } #else #define __ipv6_only_sock(sk) 0 @@ -360,7 +360,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define __tcp_v6_rcv_saddr(__sk) NULL #define tcp_v6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 -#define tcp_v6_ipv6only(__sk) 0 +#define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5d295b1b3de7..800930fac388 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -177,8 +177,8 @@ struct tcp_info #include #include -#include #include +#include #include /* This defines a selective acknowledgement block. */ @@ -219,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) } struct tcp_sock { - /* inet_sock has to be the first member of tcp_sock */ - struct inet_sock inet; + /* inet_connection_sock has to be the first member of tcp_sock */ + struct inet_connection_sock inet_conn; int tcp_header_len; /* Bytes of tcp header to send */ /* @@ -241,18 +241,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - /* Delayed ACK control data */ - struct { - __u8 pending; /* ACK is pending */ - __u8 quick; /* Scheduled number of quick acks */ - __u8 pingpong; /* The session is interactive */ - __u8 blocked; /* Delayed ACK was blocked by socket lock*/ - __u32 ato; /* Predicted tick of soft clock */ - unsigned long timeout; /* Currently scheduled timeout */ - __u32 lrcvtime; /* timestamp of last received data packet*/ - __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ - } ack; /* Data for direct copy to user */ struct { @@ -271,8 +259,8 @@ struct tcp_sock { __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ - __u8 retransmits; /* Number of unrecovered RTO timeouts. */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ @@ -281,7 +269,7 @@ struct tcp_sock { __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 unused; + __u8 nonagle; /* Disable Nagle algorithm? */ __u8 defer_accept; /* User waits for some data after accept() */ /* RTT measurement */ @@ -290,19 +278,13 @@ struct tcp_sock { __u32 mdev_max; /* maximal mdev for the last rtt period */ __u32 rttvar; /* smoothed mdev_max */ __u32 rtt_seq; /* sequence number to update rttvar */ - __u32 rto; /* retransmit timeout */ __u32 packets_out; /* Packets which are "in flight" */ __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - __u8 backoff; /* backoff */ /* * Options received (usually on last packet, some only on SYN packets). */ - __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - - __u8 probes_out; /* unanswered 0 window probes */ struct tcp_options_received rx_opt; /* @@ -315,11 +297,6 @@ struct tcp_sock { __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; - /* Two commonly used timers in both sender and receiver paths. */ - unsigned long timeout; - struct timer_list retransmit_timer; /* Resend (no ack) */ - struct timer_list delack_timer; /* Ack delay */ - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ @@ -334,7 +311,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 syn_retries; /* num of allowed syn retries */ + __u8 probes_out; /* unanswered 0 window probes */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ @@ -349,14 +326,12 @@ struct tcp_sock { int undo_retrans; /* number of undoable retransmissions. */ __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ - __u8 pending; /* Scheduled timer event */ __u8 urg_mode; /* In urgent mode */ + /* ONE BYTE HOLE, TRY TO PACK! */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ - struct request_sock_queue accept_queue; /* FIFO of established children */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 000000000000..ef609396e41b --- /dev/null +++ b/include/net/inet_connection_sock.h @@ -0,0 +1,86 @@ +/* + * NET Generic infrastructure for INET connection oriented protocols. + * + * Definitions for inet_connection_sock + * + * Authors: Many people, see the TCP sources + * + * From code originally in TCP + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_CONNECTION_SOCK_H +#define _INET_CONNECTION_SOCK_H + +#include +#include +#include + +struct inet_bind_bucket; +struct inet_hashinfo; + +/** inet_connection_sock - INET connection oriented sock + * + * @icsk_accept_queue: FIFO of established children + * @icsk_bind_hash: Bind node + * @icsk_timeout: Timeout + * @icsk_retransmit_timer: Resend (no ack) + * @icsk_rto: Retransmit timeout + * @icsk_retransmits: Number of unrecovered [RTO] timeouts + * @icsk_pending: Scheduled timer event + * @icsk_backoff: Backoff + * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_ack: Delayed ACK control data + */ +struct inet_connection_sock { + /* inet_sock has to be the first member! */ + struct inet_sock icsk_inet; + struct request_sock_queue icsk_accept_queue; + struct inet_bind_bucket *icsk_bind_hash; + unsigned long icsk_timeout; + struct timer_list icsk_retransmit_timer; + struct timer_list icsk_delack_timer; + __u32 icsk_rto; + __u8 icsk_retransmits; + __u8 icsk_pending; + __u8 icsk_backoff; + __u8 icsk_syn_retries; + struct { + __u8 pending; /* ACK is pending */ + __u8 quick; /* Scheduled number of quick acks */ + __u8 pingpong; /* The session is interactive */ + __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u32 ato; /* Predicted tick of soft clock */ + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + } icsk_ack; +}; + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +extern void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)); +extern void inet_csk_clear_xmit_timers(struct sock *sk); + +extern struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const __u32 raddr, + const __u32 laddr); +extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum); + +extern struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req); + +#endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b5c0d64ea741..f0c21c07f894 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include #include #include @@ -185,9 +185,9 @@ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; + inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 334717bf9ef6..b7c7eecbe64d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -224,17 +224,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) return prev_qlen; } -static inline int reqsk_queue_len(struct request_sock_queue *queue) +static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; } -static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return queue->listen_opt->qlen_young; } -static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) { return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; } diff --git a/include/net/sock.h b/include/net/sock.h index 828dc082fcb7..48cc337a6566 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,9 +493,6 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; -/* Here is the right place to enable sock refcounting debugging */ -//#define SOCK_REFCNT_DEBUG - /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto diff --git a/include/net/tcp.h b/include/net/tcp.h index cf8e664176ad..a943c79c88b0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,10 +19,11 @@ #define _TCP_H #define TCP_DEBUG 1 +#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 /* Cancel timers, when they are not required. */ -#undef TCP_CLEAR_TIMERS +#undef INET_CSK_CLEAR_TIMERS #include #include @@ -205,10 +206,10 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define TCP_TIME_RETRANS 1 /* Retransmit timer */ -#define TCP_TIME_DACK 2 /* Delayed ack timer */ -#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ -#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -257,9 +258,9 @@ extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define TCP_INET_FAMILY(fam) ((fam) == AF_INET) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define TCP_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) 1 #endif /* @@ -372,41 +373,42 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum tcp_ack_state_t -{ - TCP_ACK_SCHED = 1, - TCP_ACK_TIMER = 2, - TCP_ACK_PUSHED= 4 +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 }; -static inline void tcp_schedule_ack(struct tcp_sock *tp) +static inline void inet_csk_schedule_ack(struct sock *sk) { - tp->ack.pending |= TCP_ACK_SCHED; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; } -static inline int tcp_ack_scheduled(struct tcp_sock *tp) +static inline int inet_csk_ack_scheduled(const struct sock *sk) { - return tp->ack.pending&TCP_ACK_SCHED; + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; } -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) { - if (tp->ack.quick) { - if (pkts >= tp->ack.quick) { - tp->ack.quick = 0; + struct inet_connection_sock *icsk = inet_csk(sk); + if (icsk->icsk_ack.quick) { + if (pkts >= icsk->icsk_ack.quick) { + icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.ato = TCP_ATO_MIN; } else - tp->ack.quick -= pkts; + icsk->icsk_ack.quick -= pkts; } } -extern void tcp_enter_quickack_mode(struct tcp_sock *tp); +extern void tcp_enter_quickack_mode(struct sock *sk); -static __inline__ void tcp_delack_init(struct tcp_sock *tp) +static inline void inet_csk_delack_init(struct sock *sk) { - memset(&tp->ack, 0, sizeof(tp->ack)); + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -440,7 +442,7 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); +extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -534,15 +536,18 @@ extern void tcp_cwnd_application_limited(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -extern void tcp_clear_xmit_timers(struct sock *); +static inline void tcp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} -extern void tcp_delete_keepalive_timer(struct sock *); -extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef TCP_DEBUG -extern const char tcp_timer_bug_msg[]; +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; #endif /* tcp_diag.c */ @@ -554,70 +559,58 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void tcp_clear_xmit_timer(struct sock *sk, int what) +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->retransmit_timer); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif - break; - case TCP_TIME_DACK: - tp->ack.blocked = 0; - tp->ack.pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->delack_timer); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif - break; - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; - } /* * Reset the retransmission timer */ -static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (when > TCP_RTO_MAX) { -#ifdef TCP_DEBUG - printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); #endif when = TCP_RTO_MAX; } - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = what; - tp->timeout = jiffies+when; - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); - break; - - case TCP_TIME_DACK: - tp->ack.pending |= TCP_ACK_TIMER; - tp->ack.timeout = jiffies+when; - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); - break; - - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; } /* Initialize RCV_MSS value. @@ -637,7 +630,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); - tp->ack.rcv_mss = hint; + inet_csk(sk)->icsk_ack.rcv_mss = hint; } static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -772,7 +765,7 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -939,8 +932,9 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tp->packets_out && !tp->pending) - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); + const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1021,8 +1015,9 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); - if (!tcp_ack_scheduled(tp)) - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4); } return 1; } @@ -1055,7 +1050,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (inet_sk(sk)->bind_hash && + if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(&tcp_hashinfo, sk); /* fall through */ @@ -1186,51 +1181,55 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, - struct sock *child) +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { - reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); } -static inline void -tcp_synq_removed(struct sock *sk, struct request_sock *req) +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) { - if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) - tcp_delete_keepalive_timer(sk); + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); } -static inline void tcp_synq_added(struct sock *sk) +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) { - if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) - tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); } -static inline int tcp_synq_len(struct sock *sk) +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) { - return reqsk_queue_len(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_young(struct sock *sk) +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) { - return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_is_full(struct sock *sk) +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { - return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - reqsk_queue_unlink(&tp->accept_queue, req, prev); + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); } -static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - tcp_synq_unlink(tcp_sk(sk), req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); reqsk_free(req); } @@ -1265,12 +1264,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } -static inline int tcp_fin_time(const struct tcp_sock *tp) +static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + const int rto = inet_csk(sk)->icsk_rto; - if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) - fin_timeout = (tp->rto<<2) - (tp->rto>>1); + if (fin_timeout < (rto << 2) - (rto >> 1)) + fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92a..c6b84397448d 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) * it is surely retransmit. It is not in ECN RFC, * but Linux follows this rule. */ else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode((struct sock *)tp); } } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d94e962958a4..e8d29fe736d2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,6 +19,7 @@ #include #include +#include #include /* @@ -56,10 +57,9 @@ void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_sock *inet = inet_sk(sk); - inet->num = snum; + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); - inet->bind_hash = tb; + inet_csk(sk)->icsk_bind_hash = tb; } EXPORT_SYMBOL(inet_bind_hash); @@ -69,16 +69,15 @@ EXPORT_SYMBOL(inet_bind_hash); */ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); - const int bhash = inet_bhashfn(inet->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); - inet->bind_hash = NULL; - inet->num = 0; + inet_csk(sk)->icsk_bind_hash = NULL; + inet_sk(sk)->num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ceb577c74237..5cba59b869fe 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -56,6 +56,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo) { const struct inet_sock *inet = inet_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. @@ -64,8 +65,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = inet->bind_hash; - BUG_TRAP(inet->bind_hash); + tw->tw_tb = icsk->icsk_bind_hash; + BUG_TRAP(icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 72d014442185..8692cb9d4bdb 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -180,7 +180,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); if (child) - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); else reqsk_free(req); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1a708bf7a97..8177b86570db 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -313,7 +313,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) { - return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; } /* @@ -458,15 +458,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); + struct inet_connection_sock *icsk = inet_csk(sk); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE); if (rc != 0) return rc; sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; - tcp_delack_init(tp); + inet_csk_delack_init(sk); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). @@ -484,7 +484,7 @@ int tcp_listen_start(struct sock *sk) } sk->sk_state = TCP_CLOSE; - __reqsk_queue_destroy(&tp->accept_queue); + __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } @@ -495,14 +495,14 @@ int tcp_listen_start(struct sock *sk) static void tcp_listen_stop (struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; struct request_sock *req; - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); + acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -512,7 +512,7 @@ static void tcp_listen_stop (struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&tp->accept_queue); + reqsk_queue_destroy(&icsk->icsk_accept_queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -1039,20 +1039,21 @@ static void cleanup_rbuf(struct sock *sk, int copied) BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif - if (tcp_ack_scheduled(tp)) { + if (inet_csk_ack_scheduled(sk)) { + const struct inet_connection_sock *icsk = inet_csk(sk); /* Delayed ACKs frequently hit locked sockets during bulk * receive. */ - if (tp->ack.blocked || + if (icsk->icsk_ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ - tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss || + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) && - !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1569,7 +1570,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -1698,10 +1699,10 @@ adjudge_to_death: tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); } else { - int tmo = tcp_fin_time(tp); + const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); + inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); } else { atomic_inc(&tcp_orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -1746,6 +1747,7 @@ static inline int tcp_need_reset(int state) int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err = 0; int old_state = sk->sk_state; @@ -1782,7 +1784,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->srtt = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; - tp->backoff = 0; + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; tp->packets_out = 0; @@ -1790,13 +1792,13 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); tcp_clear_retrans(tp); - tcp_delack_init(tp); + inet_csk_delack_init(sk); sk->sk_send_head = NULL; tp->rx_opt.saw_tstamp = 0; tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || inet->bind_hash); + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -1808,7 +1810,7 @@ int tcp_disconnect(struct sock *sk, int flags) */ static int wait_for_connect(struct sock *sk, long timeo) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); DEFINE_WAIT(wait); int err; @@ -1830,11 +1832,11 @@ static int wait_for_connect(struct sock *sk, long timeo) prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); release_sock(sk); - if (reqsk_queue_empty(&tp->accept_queue)) + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); lock_sock(sk); err = 0; - if (!reqsk_queue_empty(&tp->accept_queue)) + if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) break; err = -EINVAL; if (sk->sk_state != TCP_LISTEN) @@ -1854,9 +1856,9 @@ static int wait_for_connect(struct sock *sk, long timeo) * This will accept the next outstanding connection. */ -struct sock *tcp_accept(struct sock *sk, int flags, int *err) +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct sock *newsk; int error; @@ -1870,7 +1872,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&tp->accept_queue)) { + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -1883,7 +1885,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; } - newsk = reqsk_queue_get_child(&tp->accept_queue, sk); + newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); out: release_sock(sk); @@ -1901,6 +1903,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int val; int err = 0; @@ -1999,7 +2002,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - tcp_reset_keepalive_timer(sk, elapsed); + inet_csk_reset_keepalive_timer(sk, elapsed); } } break; @@ -2019,7 +2022,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else - tp->syn_retries = val; + icsk->icsk_syn_retries = val; break; case TCP_LINGER2: @@ -2058,16 +2061,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, case TCP_QUICKACK: if (!val) { - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } else { - tp->ack.pingpong = 0; + icsk->icsk_ack.pingpong = 0; if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - tcp_ack_scheduled(tp)) { - tp->ack.pending |= TCP_ACK_PUSHED; + inet_csk_ack_scheduled(sk)) { + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; cleanup_rbuf(sk, 1); if (!(val & 1)) - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } } break; @@ -2084,15 +2087,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, void tcp_get_info(struct sock *sk, struct tcp_info *info) { struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; info->tcpi_ca_state = tp->ca_state; - info->tcpi_retransmits = tp->retransmits; + info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = tp->probes_out; - info->tcpi_backoff = tp->backoff; + info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; @@ -2107,10 +2111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->ecn_flags&TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; - info->tcpi_rto = jiffies_to_usecs(tp->rto); - info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); + info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); + info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); info->tcpi_snd_mss = tp->mss_cache; - info->tcpi_rcv_mss = tp->ack.rcv_mss; + info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; info->tcpi_unacked = tp->packets_out; info->tcpi_sacked = tp->sacked_out; @@ -2119,7 +2123,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); - info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); + info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = tp->pmtu_cookie; @@ -2179,7 +2183,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: - val = tp->syn_retries ? : sysctl_tcp_syn_retries; + val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; @@ -2209,7 +2213,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return 0; } case TCP_QUICKACK: - val = !tp->ack.pingpong; + val = !inet_csk(sk)->icsk_ack.pingpong; break; case TCP_CONGESTION: @@ -2340,7 +2344,7 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); } -EXPORT_SYMBOL(tcp_accept); +EXPORT_SYMBOL(inet_csk_accept); EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 60c6a797cc50..5f4c74f45e82 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -48,8 +48,9 @@ static struct sock *tcpnl; static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; struct tcp_info *info = NULL; @@ -129,14 +130,14 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->tcpdiag_timer = 1; - r->tcpdiag_retrans = tp->retransmits; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); - } else if (tp->pending == TCP_TIME_PROBE0) { + r->tcpdiag_retrans = icsk->icsk_retransmits; + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; r->tcpdiag_retrans = tp->probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; r->tcpdiag_retrans = tp->probes_out; @@ -497,7 +498,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, { struct tcpdiag_entry entry; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; struct rtattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); @@ -513,9 +514,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&tp->accept_queue.syn_wait_lock); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - lopt = tp->accept_queue.listen_opt; + lopt = icsk->icsk_accept_queue.listen_opt; if (!lopt || !lopt->qlen) goto out; @@ -572,7 +573,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ffa24025cd02..8a8c5c2d90cb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, - struct sk_buff *skb) +static inline void tcp_measure_rcv_mss(struct sock *sk, + const struct sk_buff *skb) { - unsigned int len, lss; + struct inet_connection_sock *icsk = inet_csk(sk); + const unsigned int lss = icsk->icsk_ack.last_seg_size; + unsigned int len; - lss = tp->ack.last_seg_size; - tp->ack.last_seg_size = 0; + icsk->icsk_ack.last_seg_size = 0; /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ len = skb->len; - if (len >= tp->ack.rcv_mss) { - tp->ack.rcv_mss = len; + if (len >= icsk->icsk_ack.rcv_mss) { + icsk->icsk_ack.rcv_mss = len; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. */ - len -= tp->tcp_header_len; - tp->ack.last_seg_size = len; + len -= tcp_sk(sk)->tcp_header_len; + icsk->icsk_ack.last_seg_size = len; if (len == lss) { - tp->ack.rcv_mss = len; + icsk->icsk_ack.rcv_mss = len; return; } } - tp->ack.pending |= TCP_ACK_PUSHED; + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } -static void tcp_incr_quickack(struct tcp_sock *tp) +static void tcp_incr_quickack(struct sock *sk) { - unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks==0) quickacks=2; - if (quickacks > tp->ack.quick) - tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS); + if (quickacks > icsk->icsk_ack.quick) + icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -void tcp_enter_quickack_mode(struct tcp_sock *tp) +void tcp_enter_quickack_mode(struct sock *sk) { - tcp_incr_quickack(tp); - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + struct inet_connection_sock *icsk = inet_csk(sk); + tcp_incr_quickack(sk); + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. */ -static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp) +static inline int tcp_in_quickack_mode(const struct sock *sk) { - return (tp->ack.quick && !tp->ack.pingpong); + const struct inet_connection_sock *icsk = inet_csk(sk); + return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } /* Buffer size and advertised window tuning. @@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, + const struct sk_buff *skb) { /* Optimize this! */ int truesize = tcp_win_from_space(skb->truesize)/2; @@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) - return 2*tp->ack.rcv_mss; + return 2 * inet_csk(sk)->icsk_ack.rcv_mss; truesize >>= 1; window >>= 1; @@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, if (incr) { tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); - tp->ack.quick |= 1; + inet_csk(sk)->icsk_ack.quick |= 1; } } } @@ -325,7 +329,7 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - tp->ack.quick = 0; + inet_csk(sk)->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -346,8 +350,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > tp->ack.rcv_mss) - app_win -= tp->ack.rcv_mss; + if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) + app_win -= inet_csk(sk)->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -415,11 +419,12 @@ new_measure: tp->rcv_rtt_est.time = tcp_time_stamp; } -static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb) +static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss)) + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); } @@ -492,41 +497,42 @@ new_measure: */ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); u32 now; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); - tcp_measure_rcv_mss(tp, skb); + tcp_measure_rcv_mss(sk, skb); tcp_rcv_rtt_measure(tp); now = tcp_time_stamp; - if (!tp->ack.ato) { + if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize * delayed ACK engine. */ - tcp_incr_quickack(tp); - tp->ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + icsk->icsk_ack.ato = TCP_ATO_MIN; } else { - int m = now - tp->ack.lrcvtime; + int m = now - icsk->icsk_ack.lrcvtime; if (m <= TCP_ATO_MIN/2) { /* The fastest case is the first. */ - tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2; - } else if (m < tp->ack.ato) { - tp->ack.ato = (tp->ack.ato>>1) + m; - if (tp->ack.ato > tp->rto) - tp->ack.ato = tp->rto; - } else if (m > tp->rto) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; + } else if (m < icsk->icsk_ack.ato) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m; + if (icsk->icsk_ack.ato > icsk->icsk_rto) + icsk->icsk_ack.ato = icsk->icsk_rto; + } else if (m > icsk->icsk_rto) { /* Too long gap. Apparently sender falled to * restart window, so that we send ACKs quickly. */ - tcp_incr_quickack(tp); + tcp_incr_quickack(sk); sk_stream_mem_reclaim(sk); } } - tp->ack.lrcvtime = now; + icsk->icsk_ack.lrcvtime = now; TCP_ECN_check_ce(tp, skb); @@ -611,8 +617,9 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static inline void tcp_set_rto(struct tcp_sock *tp) +static inline void tcp_set_rto(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * * More seriously: @@ -623,7 +630,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some curcumstances. */ - tp->rto = (tp->srtt >> 3) + tp->rttvar; + inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -635,10 +642,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp) /* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */ -static inline void tcp_bound_rto(struct tcp_sock *tp) +static inline void tcp_bound_rto(struct sock *sk) { - if (tp->rto > TCP_RTO_MAX) - tp->rto = TCP_RTO_MAX; + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } /* Save metrics learned by this TCP session. @@ -658,7 +665,7 @@ void tcp_update_metrics(struct sock *sk) if (dst && (dst->flags&DST_HOST)) { int m; - if (tp->backoff || !tp->srtt) { + if (inet_csk(sk)->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -801,9 +808,9 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev = dst_metric(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); } - tcp_set_rto(tp); - tcp_bound_rto(tp); - if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) + tcp_set_rto(sk); + tcp_bound_rto(sk); + if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -817,7 +824,7 @@ reset: if (!tp->rx_opt.saw_tstamp && tp->srtt) { tp->srtt = 0; tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } } @@ -1118,7 +1125,7 @@ void tcp_enter_frto(struct sock *sk) if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_FRTO); @@ -1214,7 +1221,7 @@ void tcp_enter_loss(struct sock *sk, int how) /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_LOSS); @@ -1253,7 +1260,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_ECN_queue_cwr(tp); } -static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) +static int tcp_check_sack_reneging(struct sock *sk) { struct sk_buff *skb; @@ -1268,9 +1275,10 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - tp->retransmits++; + inet_csk(sk)->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); return 1; } return 0; @@ -1281,15 +1289,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; } -static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) { - return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); + return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); } static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) { return tp->packets_out && - tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); + tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); } /* Linux NewReno/SACK/FACK/ECN state machine. @@ -1509,7 +1517,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { - if (tcp_skb_timedout(tp, skb) && + if (tcp_skb_timedout(sk, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -1676,7 +1684,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) tp->left_out = tp->sacked_out; tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) tcp_set_ca_state(tp, TCP_CA_Open); @@ -1750,7 +1758,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) + if (tp->sacked_out && tcp_check_sack_reneging(sk)) return; /* C. Process data loss notification, provided it is valid. */ @@ -1774,7 +1782,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { case TCP_CA_Loss: - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1824,7 +1832,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); @@ -1881,10 +1889,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ -static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) +static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) { - __u32 seq_rtt; - /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment * acknowledges some new data, i.e., only if it advances the @@ -1900,14 +1906,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) * answer arrives rto becomes 120 seconds! If at least one of segments * in window is lost... Voila. --ANK (010210) */ - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + struct tcp_sock *tp = tcp_sk(sk); + const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag) +static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) { /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine @@ -1921,20 +1928,21 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static inline void tcp_ack_update_rtt(struct tcp_sock *tp, - int flag, s32 seq_rtt, u32 *usrtt) +static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, + const s32 seq_rtt, u32 *usrtt) { + const struct tcp_sock *tp = tcp_sk(sk); /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) - tcp_ack_saw_tstamp(tp, usrtt, flag); + tcp_ack_saw_tstamp(sk, usrtt, flag); else if (seq_rtt >= 0) - tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag); + tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, @@ -1951,9 +1959,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out) { - tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } } @@ -2090,7 +2098,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { - tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt); + tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); if (tp->ca_ops->pkts_acked) @@ -2125,20 +2133,21 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt static void tcp_ack_probe(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); /* Was it a usable window open? */ if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, tp->snd_una + tp->snd_wnd)) { - tp->backoff = 0; - tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); + icsk->icsk_backoff = 0; + inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! */ } else { - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } } @@ -2157,8 +2166,8 @@ static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack, - u32 ack_seq, u32 nwin) +static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin) { return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || @@ -2500,8 +2509,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * up to bandwidth of 18Gigabit/sec. 8) ] */ -static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) +static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th = skb->h.th; u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -2516,14 +2526,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); } -static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { + const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(tp, skb)); + !tcp_disordered_ack(sk, skb)); } /* Check segment sequence number for validity. @@ -2586,7 +2597,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); @@ -2596,7 +2607,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - tp->ack.pingpong = 1; + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: @@ -2694,7 +2705,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -2942,7 +2953,7 @@ queue_and_out: * gap in queue is filled. */ if (skb_queue_empty(&tp->out_of_order_queue)) - tp->ack.pingpong = 0; + inet_csk(sk)->icsk_ack.pingpong = 0; } if (tp->rx_opt.num_sacks) @@ -2963,8 +2974,8 @@ queue_and_out: tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: - tcp_enter_quickack_mode(tp); - tcp_schedule_ack(tp); + tcp_enter_quickack_mode(sk); + inet_csk_schedule_ack(sk); drop: __kfree_skb(skb); return; @@ -2974,7 +2985,7 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ @@ -3003,7 +3014,7 @@ drop: /* Disable header prediction. */ tp->pred_flags = 0; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -3373,13 +3384,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ && __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ - tcp_in_quickack_mode(tp) || + tcp_in_quickack_mode(sk) || /* We have out of order data. */ (ofo_possible && skb_peek(&tp->out_of_order_queue))) { @@ -3393,8 +3404,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) static __inline__ void tcp_ack_snd_check(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_ack_scheduled(tp)) { + if (!inet_csk_ack_scheduled(sk)) { /* We sent a data segment already. */ return; } @@ -3648,7 +3658,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* We know that such packets are checksummed * on entry. @@ -3681,7 +3691,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -3702,7 +3712,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; @@ -3722,7 +3732,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Well, only one small jumplet in fast path... */ tcp_ack(sk, skb, FLAG_DATA); tcp_data_snd_check(sk, tp); - if (!tcp_ack_scheduled(tp)) + if (!inet_csk_ack_scheduled(sk)) goto no_ack; } @@ -3744,7 +3754,7 @@ slow_path: * RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -3791,7 +3801,7 @@ step5: if(th->ack) tcp_ack(sk, skb, FLAG_SLOWPATH); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ tcp_urg(sk, skb, th); @@ -3933,7 +3943,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_buffer_space(sk); if (sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -3945,7 +3955,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { + if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -3953,12 +3963,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ - tcp_schedule_ack(tp); - tp->ack.lrcvtime = tcp_time_stamp; - tp->ack.ato = TCP_ATO_MIN; - tcp_incr_quickack(tp); - tcp_enter_quickack_mode(tp); - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + tcp_enter_quickack_mode(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); discard: __kfree_skb(skb); @@ -4114,7 +4124,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -4183,7 +4193,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(tp, 0, 0); + tcp_ack_saw_tstamp(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -4230,9 +4240,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - tmo = tcp_fin_time(tp); + tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -4240,7 +4250,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * if it spins in bh_lock_sock(), but it is really * marginal case. */ - tcp_reset_keepalive_timer(sk, tmo); + inet_csk_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto discard; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e7e91e60ac74..2cd41265d17f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,7 +104,7 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { */ int sysctl_local_port_range[2] = { 1024, 4999 }; -static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; @@ -113,7 +113,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && - !tcp_v6_ipv6only(sk2) && + !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { @@ -132,7 +132,8 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ -static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum) { struct inet_bind_hashbucket *head; struct hlist_node *node; @@ -146,16 +147,16 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_hashinfo.portalloc_lock); - if (tcp_hashinfo.port_rover < low) + spin_lock(&hashinfo->portalloc_lock); + if (hashinfo->port_rover < low) rover = low; else - rover = tcp_hashinfo.port_rover; + rover = hashinfo->port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -164,8 +165,8 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_hashinfo.port_rover = rover; - spin_unlock(&tcp_hashinfo.portalloc_lock); + hashinfo->port_rover = rover; + spin_unlock(&hashinfo->portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -182,7 +183,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -199,13 +200,13 @@ tb_found: goto success; } else { ret = 1; - if (tcp_bind_conflict(sk, tb)) + if (inet_csk_bind_conflict(sk, tb)) goto fail_unlock; } } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -216,9 +217,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -228,6 +229,11 @@ fail: return ret; } +static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return inet_csk_get_port(&tcp_hashinfo, sk, snum); +} + static void tcp_v4_hash(struct sock *sk) { inet_hash(&tcp_hashinfo, sk); @@ -426,7 +432,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __inet_hash(&tcp_hashinfo, sk, 0); @@ -557,25 +563,28 @@ failure: return err; } -static __inline__ int tcp_v4_iif(struct sk_buff *skb) +static inline int inet_iif(const struct sk_buff *skb) { return ((struct rtable *)skb->dst)->rt_iif; } -static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) { - return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); + return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); } -static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, - struct request_sock ***prevp, - __u16 rport, - __u32 raddr, __u32 laddr) +struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, const __u32 raddr, + const __u32 laddr) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; + for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -583,7 +592,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, if (ireq->rmt_port == rport && ireq->rmt_addr == raddr && ireq->loc_addr == laddr && - TCP_INET_FAMILY(req->rsk_ops->family)) { + AF_INET_FAMILY(req->rsk_ops->family)) { BUG_TRAP(!req->sk); *prevp = prev; break; @@ -595,12 +604,13 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -687,7 +697,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, tcp_v4_iif(skb)); + th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -747,8 +757,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) if (sock_owned_by_user(sk)) goto out; - req = tcp_v4_search_req(tp, &prev, th->dest, - iph->daddr, iph->saddr); + req = inet_csk_search_req(sk, &prev, th->dest, + iph->daddr, iph->saddr); if (!req) goto out; @@ -768,7 +778,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -953,8 +963,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) req->ts_recent); } -static struct dst_entry* tcp_v4_route_req(struct sock *sk, - struct request_sock *req) +struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -966,7 +976,7 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk, ireq->rmt_addr), .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, + .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; @@ -996,7 +1006,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); @@ -1098,7 +1108,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1112,7 +1122,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp_request_sock_ops); @@ -1169,7 +1179,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && sysctl_tcp_tw_recycle && - (dst = tcp_v4_route_req(sk, req)) != NULL && + (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && @@ -1182,7 +1192,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - tcp_synq_len(sk) < + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst_metric(dst, RTAX_RTT))) { @@ -1240,7 +1250,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); @@ -1257,7 +1267,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; - newinet->mc_index = tcp_v4_iif(skb); + newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; newtp->ext_header_len = 0; if (newinet->opt) @@ -1285,18 +1295,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th; struct iphdr *iph = skb->nh.iph; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, - iph->saddr, iph->daddr); + struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, + iph->saddr, iph->daddr); if (req) return tcp_check_req(sk, skb, req, prev); nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, - ntohs(th->dest), tcp_v4_iif(skb)); + ntohs(th->dest), inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1440,7 +1449,7 @@ int tcp_v4_rcv(struct sk_buff *skb) sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1507,7 +1516,7 @@ do_time_wait: struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (sk2) { tcp_tw_deschedule((struct inet_timewait_sock *)sk); inet_twsk_put((struct inet_timewait_sock *)sk); @@ -1619,7 +1628,7 @@ static int tcp_v4_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1672,7 +1681,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (inet_sk(sk)->bind_hash) + if (inet_csk(sk)->icsk_bind_hash) inet_put_port(&tcp_hashinfo, sk); /* @@ -1707,7 +1716,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) static void *listening_get_next(struct seq_file *seq, void *cur) { - struct tcp_sock *tp; + struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; struct tcp_iter_state* st = seq->private; @@ -1723,7 +1732,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; - tp = tcp_sk(st->syn_wait_sk); + icsk = inet_csk(st->syn_wait_sk); req = req->dl_next; while (1) { while (req) { @@ -1736,17 +1745,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (++st->sbucket >= TCP_SYNQ_HSIZE) break; get_req: - req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; + req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } sk = sk_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_next(sk); } get_sk: @@ -1755,9 +1764,9 @@ get_sk: cur = sk; goto out; } - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) { + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); st->syn_wait_sk = sk; @@ -1765,7 +1774,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); @@ -1951,8 +1960,8 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_OPENREQ: if (v) { - struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) @@ -2058,18 +2067,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) int timer_active; unsigned long timer_expires; struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2084,12 +2094,14 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } @@ -2174,7 +2186,7 @@ struct proto tcp_prot = { .close = tcp_close, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, .destroy = tcp_v4_destroy_sock, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8b6cd8d80662..56823704eb7d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -271,7 +271,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const int rto = (tp->rto << 2) - (tp->rto >> 1); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; @@ -605,10 +606,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_sock *newinet = inet_sk(newsk); + struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp; newsk->sk_state = TCP_SYN_RECV; - newinet->bind_hash = NULL; + newicsk->icsk_bind_hash = NULL; /* Clone the TCP header template */ newinet->dport = ireq->rmt_port; @@ -624,11 +626,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); - newtp->retransmits = 0; - newtp->backoff = 0; + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; newtp->srtt = 0; newtp->mdev = TCP_TIMEOUT_INIT; - newtp->rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; newtp->left_out = 0; @@ -667,11 +669,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); + memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); if (sock_flag(newsk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + inet_csk_reset_keepalive_timer(newsk, + keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { @@ -701,7 +703,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->tcp_header_len = sizeof(struct tcphdr); } if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) - newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len; + newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); if (newtp->ecn_flags&TCP_ECN_OK) @@ -881,10 +883,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (child == NULL) goto listen_overflow; - tcp_synq_unlink(tp, req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: @@ -898,7 +900,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_reset(skb); - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a4d1eb9a0926..6f0a7e30ceac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -105,8 +105,9 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) { + struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; @@ -116,7 +117,7 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) tp->snd_ssthresh = tcp_current_ssthresh(tp); restart_cwnd = min(restart_cwnd, cwnd); - while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) + while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) static inline void tcp_event_data_sent(struct tcp_sock *tp, struct sk_buff *skb, struct sock *sk) { - u32 now = tcp_time_stamp; + struct inet_connection_sock *icsk = inet_csk(sk); + const u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) - tcp_cwnd_restart(tp, __sk_dst_get(sk)); + if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) + tcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) - tp->ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + icsk->icsk_ack.pingpong = 1; } static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { - struct tcp_sock *tp = tcp_sk(sk); - - tcp_dec_quickack_mode(tp, pkts); - tcp_clear_xmit_timer(sk, TCP_TIME_DACK); + tcp_dec_quickack_mode(sk, pkts); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } /* Determine a window scaling and initial window to offer. @@ -696,7 +696,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) + if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) tcp_cwnd_application_limited(sk); } } @@ -1147,6 +1147,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) */ u32 __tcp_select_window(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* MSS for the peer's data. Previous verions used mss_clamp * here. I don't know if the value based on our guesses @@ -1154,7 +1155,7 @@ u32 __tcp_select_window(struct sock *sk) * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ - int mss = tp->ack.rcv_mss; + int mss = icsk->icsk_ack.rcv_mss; int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; @@ -1163,7 +1164,7 @@ u32 __tcp_select_window(struct sock *sk) mss = full_space; if (free_space < full_space/2) { - tp->ack.quick = 0; + icsk->icsk_ack.quick = 0; if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); @@ -1491,7 +1492,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); } packet_cnt -= tcp_skb_pcount(skb); @@ -1544,7 +1546,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } @@ -1780,8 +1782,8 @@ static inline void tcp_connect_init(struct sock *sk) tp->rcv_wup = 0; tp->copied_seq = 0; - tp->rto = TCP_TIMEOUT_INIT; - tp->retransmits = 0; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } @@ -1824,7 +1826,7 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); return 0; } @@ -1834,20 +1836,21 @@ int tcp_connect(struct sock *sk) */ void tcp_send_delayed_ack(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - int ato = tp->ack.ato; + struct inet_connection_sock *icsk = inet_csk(sk); + int ato = icsk->icsk_ack.ato; unsigned long timeout; if (ato > TCP_DELACK_MIN) { + const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ/2; - if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) + if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ /* If some rtt estimate is known, use it to bound delayed ack. - * Do not use tp->rto here, use results of rtt measurements + * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ if (tp->srtt) { @@ -1864,21 +1867,22 @@ void tcp_send_delayed_ack(struct sock *sk) timeout = jiffies + ato; /* Use new timeout only if there wasn't a older one earlier. */ - if (tp->ack.pending&TCP_ACK_TIMER) { + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { /* If delack timer was blocked or is about to expire, * send ACK now. */ - if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { + if (icsk->icsk_ack.blocked || + time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { tcp_send_ack(sk); return; } - if (!time_before(timeout, tp->ack.timeout)) - timeout = tp->ack.timeout; + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; } - tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; - tp->ack.timeout = timeout; - sk_reset_timer(sk, &tp->delack_timer, timeout); + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } /* This routine sends an ack and also updates the window. */ @@ -1895,9 +1899,9 @@ void tcp_send_ack(struct sock *sk) */ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (buff == NULL) { - tcp_schedule_ack(tp); - tp->ack.ato = TCP_ATO_MIN; - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); return; } @@ -2011,6 +2015,7 @@ int tcp_write_wakeup(struct sock *sk) */ void tcp_send_probe0(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err; @@ -2019,16 +2024,16 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; - tp->backoff = 0; + icsk->icsk_backoff = 0; return; } if (err <= 0) { - if (tp->backoff < sysctl_tcp_retries2) - tp->backoff++; + if (icsk->icsk_backoff < sysctl_tcp_retries2) + icsk->icsk_backoff++; tp->probes_out++; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } else { /* If packet was not sent due to local congestion, * do not backoff and do not remember probes_out. @@ -2038,8 +2043,9 @@ void tcp_send_probe0(struct sock *sk) */ if (!tp->probes_out) tp->probes_out=1; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, + TCP_RESOURCE_PROBE_INTERVAL)); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0084227438c2..0b71380ee42f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,9 +36,9 @@ static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); static void tcp_keepalive_timer (unsigned long data); -#ifdef TCP_DEBUG -const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; -EXPORT_SYMBOL(tcp_timer_bug_msg); +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* @@ -46,38 +46,43 @@ EXPORT_SYMBOL(tcp_timer_bug_msg); * We may wish use just one timer maintaining a list of expire jiffies * to optimize. */ +void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + init_timer(&icsk->icsk_retransmit_timer); + init_timer(&icsk->icsk_delack_timer); + init_timer(&sk->sk_timer); + + icsk->icsk_retransmit_timer.function = retransmit_handler; + icsk->icsk_delack_timer.function = delack_handler; + sk->sk_timer.function = keepalive_handler; + + icsk->icsk_retransmit_timer.data = + icsk->icsk_delack_timer.data = + sk->sk_timer.data = (unsigned long)sk; + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; +} + +void inet_csk_clear_xmit_timers(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &icsk->icsk_delack_timer); + sk_stop_timer(sk, &sk->sk_timer); +} void tcp_init_xmit_timers(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - - init_timer(&tp->retransmit_timer); - tp->retransmit_timer.function=&tcp_write_timer; - tp->retransmit_timer.data = (unsigned long) sk; - tp->pending = 0; - - init_timer(&tp->delack_timer); - tp->delack_timer.function=&tcp_delack_timer; - tp->delack_timer.data = (unsigned long) sk; - tp->ack.pending = 0; - - init_timer(&sk->sk_timer); - sk->sk_timer.function = &tcp_keepalive_timer; - sk->sk_timer.data = (unsigned long)sk; -} - -void tcp_clear_xmit_timers(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tp->pending = 0; - sk_stop_timer(sk, &tp->retransmit_timer); - - tp->ack.pending = 0; - tp->ack.blocked = 0; - sk_stop_timer(sk, &tp->delack_timer); - - sk_stop_timer(sk, &sk->sk_timer); + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); } static void tcp_write_err(struct sock *sk) @@ -155,15 +160,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive) /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (tp->retransmits) + if (icsk->icsk_retransmits) dst_negative_advice(&sk->sk_dst_cache); - retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (tp->retransmits >= sysctl_tcp_retries1) { + if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black hole detection. :-( @@ -189,16 +194,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = (tp->rto < TCP_RTO_MAX); + const int alive = (icsk->icsk_rto < TCP_RTO_MAX); retry_until = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) return 1; } } - if (tp->retransmits >= retry_until) { + if (icsk->icsk_retransmits >= retry_until) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -210,26 +215,27 @@ static void tcp_delack_timer(unsigned long data) { struct sock *sk = (struct sock*)data; struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tp->ack.blocked = 1; + icsk->icsk_ack.blocked = 1; NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out_unlock; } sk_stream_mem_reclaim(sk); - if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) + if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; - if (time_after(tp->ack.timeout, jiffies)) { - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); goto out; } - tp->ack.pending &= ~TCP_ACK_TIMER; + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; @@ -242,16 +248,16 @@ static void tcp_delack_timer(unsigned long data) tp->ucopy.memory = 0; } - if (tcp_ack_scheduled(tp)) { - if (!tp->ack.pingpong) { + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ - tp->ack.ato = min(tp->ack.ato << 1, tp->rto); + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); @@ -294,7 +300,8 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = ((tp->rto<backoff) < TCP_RTO_MAX); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); @@ -317,6 +324,7 @@ static void tcp_probe_timer(struct sock *sk) static void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out) goto out; @@ -351,7 +359,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (tcp_write_timeout(sk)) goto out; - if (tp->retransmits == 0) { + if (icsk->icsk_retransmits == 0) { if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { if (tp->ca_state == TCP_CA_Recovery) @@ -381,10 +389,10 @@ static void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * do not backoff. */ - if (!tp->retransmits) - tp->retransmits=1; - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, - min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL)); + if (!icsk->icsk_retransmits) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); goto out; } @@ -403,13 +411,13 @@ static void tcp_retransmit_timer(struct sock *sk) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - tp->backoff++; - tp->retransmits++; + icsk->icsk_backoff++; + icsk->icsk_retransmits++; out_reset_timer: - tp->rto = min(tp->rto << 1, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - if (tp->retransmits > sysctl_tcp_retries1) + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); + if (icsk->icsk_retransmits > sysctl_tcp_retries1) __sk_dst_reset(sk); out:; @@ -418,32 +426,32 @@ out:; static void tcp_write_timer(unsigned long data) { struct sock *sk = (struct sock*)data; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int event; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later */ - sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20)); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); goto out_unlock; } - if (sk->sk_state == TCP_CLOSE || !tp->pending) + if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; - if (time_after(tp->timeout, jiffies)) { - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); goto out; } - event = tp->pending; - tp->pending = 0; + event = icsk->icsk_pending; + icsk->icsk_pending = 0; switch (event) { - case TCP_TIME_RETRANS: + case ICSK_TIME_RETRANS: tcp_retransmit_timer(sk); break; - case TCP_TIME_PROBE0: + case ICSK_TIME_PROBE0: tcp_probe_timer(sk); break; } @@ -463,8 +471,9 @@ out_unlock: static void tcp_synack_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; @@ -526,8 +535,8 @@ static void tcp_synack_timer(struct sock *sk) } /* Drop this request */ - tcp_synq_unlink(tp, req, reqp); - reqsk_queue_removed(&tp->accept_queue, req); + inet_csk_reqsk_queue_unlink(sk, req, reqp); + reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_free(req); continue; } @@ -541,15 +550,15 @@ static void tcp_synack_timer(struct sock *sk) lopt->clock_hand = i; if (lopt->qlen) - tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); + inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); } -void tcp_delete_keepalive_timer (struct sock *sk) +void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } -void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len) +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } @@ -560,9 +569,9 @@ void tcp_set_keepalive(struct sock *sk, int val) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); } @@ -576,7 +585,7 @@ static void tcp_keepalive_timer (unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tcp_reset_keepalive_timer (sk, HZ/20); + inet_csk_reset_keepalive_timer (sk, HZ/20); goto out; } @@ -587,7 +596,7 @@ static void tcp_keepalive_timer (unsigned long data) if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (tp->linger2 >= 0) { - int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN; + const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -634,7 +643,7 @@ static void tcp_keepalive_timer (unsigned long data) sk_stream_mem_reclaim(sk); resched: - tcp_reset_keepalive_timer (sk, elapsed); + inet_csk_reset_keepalive_timer (sk, elapsed); goto out; death: @@ -645,7 +654,7 @@ out: sock_put(sk); } -EXPORT_SYMBOL(tcp_clear_xmit_timers); -EXPORT_SYMBOL(tcp_delete_keepalive_timer); +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(tcp_reset_keepalive_timer); +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4582d9cf4bbe..b9c3da349492 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1043,7 +1043,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = tcp_v6_ipv6only(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index af8ad5bb273b..b9c7003b7f8b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -207,9 +207,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -381,7 +381,7 @@ EXPORT_SYMBOL_GPL(tcp_v6_lookup); * Open request hash tables. */ -static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) +static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) { u32 a, b, c; @@ -401,14 +401,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) return c & (TCP_SYNQ_HSIZE - 1); } -static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, +static struct request_sock *tcp_v6_search_req(const struct sock *sk, struct request_sock ***prevp, __u16 rport, struct in6_addr *raddr, struct in6_addr *laddr, int iif) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; @@ -619,7 +620,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -925,7 +926,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; - req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr, + req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, tcp_v6_iif(skb)); if (!req) goto out; @@ -940,7 +941,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -1245,11 +1246,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; struct tcphdr *th = skb->h.th; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; /* Find possible connection requests. */ - req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr, + req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1278,12 +1278,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -1308,13 +1308,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* * There are no SYN attacks on IPv6, yet... */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); goto drop; } - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp6_request_sock_ops); @@ -2015,7 +2015,7 @@ static int tcp_v6_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2098,18 +2098,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) unsigned long timer_expires; struct inet_sock *inet = inet_sk(sp); struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; destp = ntohs(inet->dport); srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2130,12 +2132,14 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); } @@ -2227,7 +2231,7 @@ struct proto tcpv6_prot = { .close = tcp_close, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, .destroy = tcp_v6_destroy_sock, From 3f421baa4720b708022f8bcc52a61e5cd6f10bf8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:08 -0700 Subject: [PATCH 323/584] [NET]: Just move the inet_connection_sock function from tcp sources Completing the previous changeset, this also generalises tcp_v4_synq_add, renaming it to inet_csk_reqsk_queue_hash_add, already geing used in the DCCP tree, which I plan to merge RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 7 +- include/net/inet_connection_sock.h | 152 +++++++++++ include/net/tcp.h | 160 +----------- net/ipv4/Makefile | 2 +- net/ipv4/inet_connection_sock.c | 401 +++++++++++++++++++++++++++++ net/ipv4/tcp.c | 93 ------- net/ipv4/tcp_input.c | 10 +- net/ipv4/tcp_ipv4.c | 210 +-------------- net/ipv4/tcp_output.c | 19 +- net/ipv4/tcp_timer.c | 65 +---- 10 files changed, 588 insertions(+), 531 deletions(-) create mode 100644 net/ipv4/inet_connection_sock.c diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 88591913c94f..777339b68464 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int inet_twsk_ipv6only(const struct sock *sk) -{ - return inet_twsk(sk)->tw_ipv6only; -} - static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only; } #else #define __ipv6_only_sock(sk) 0 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ef609396e41b..97e002001c1a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,15 @@ #define _INET_CONNECTION_SOCK_H #include +#include #include #include +#define INET_CSK_DEBUG 1 + +/* Cancel timers, when they are not required. */ +#undef INET_CSK_CLEAR_TIMERS + struct inet_bind_bucket; struct inet_hashinfo; @@ -61,17 +67,107 @@ struct inet_connection_sock { } icsk_ack; }; +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ + static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; } +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 +}; + extern void inet_csk_init_xmit_timers(struct sock *sk, void (*retransmit_handler)(unsigned long), void (*delack_handler)(unsigned long), void (*keepalive_handler)(unsigned long)); extern void inet_csk_clear_xmit_timers(struct sock *sk); +static inline void inet_csk_schedule_ack(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; +} + +static inline int inet_csk_ack_scheduled(const struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; +} + +static inline void inet_csk_delack_init(struct sock *sk) +{ + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); +} + +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); + +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; +#endif + +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +#endif + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); +#endif + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +/* + * Reset the retransmission timer + */ +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when, + const unsigned long max_when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (when > max_when) { +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); +#endif + when = max_when; + } + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); + extern struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __u16 rport, @@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, extern struct dst_entry* inet_csk_route_req(struct sock *sk, const struct request_sock *req); +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) +{ + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); +} + +extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned timeout); + +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) +{ + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); +} + +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) +{ + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); +} + +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) +{ + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) +{ + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) +{ + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); +} + +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); +} + +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + reqsk_free(req); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index a943c79c88b0..dd9a5a288f88 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,18 +19,16 @@ #define _TCP_H #define TCP_DEBUG 1 -#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 -/* Cancel timers, when they are not required. */ -#undef INET_CSK_CLEAR_TIMERS - #include #include #include #include #include #include + +#include #include #include #include @@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -#define ICSK_TIME_DACK 2 /* Delayed ack timer */ -#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ - /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ @@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define AF_INET_FAMILY(fam) ((fam) == AF_INET) -#else -#define AF_INET_FAMILY(fam) 1 -#endif - /* * Pointers to address related TCP functions * (i.e. things that depend on the address family) @@ -373,22 +360,6 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum inet_csk_ack_state_t { - ICSK_ACK_SCHED = 1, - ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4 -}; - -static inline void inet_csk_schedule_ack(struct sock *sk) -{ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; -} - -static inline int inet_csk_ack_scheduled(const struct sock *sk) -{ - return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; -} - static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void inet_csk_delack_init(struct sock *sk) -{ - memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); -} - static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; @@ -442,7 +408,6 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) inet_csk_clear_xmit_timers(sk); } -extern void inet_csk_delete_keepalive_timer(struct sock *sk); -extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef INET_CSK_DEBUG -extern const char inet_csk_timer_bug_msg[]; -#endif - /* tcp_diag.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); -#endif - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_delack_timer); -#endif - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - -/* - * Reset the retransmission timer - */ -static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, - unsigned long when) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (when > TCP_RTO_MAX) { -#ifdef INET_CSK_DEBUG - pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", - sk, what, when, current_text_addr()); -#endif - when = TCP_RTO_MAX; - } - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = what; - icsk->icsk_timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending |= ICSK_ACK_TIMER; - icsk->icsk_ack.timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. @@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + icsk->icsk_rto, TCP_RTO_MAX); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4); + (3 * TCP_RTO_MIN) / 4, + TCP_RTO_MAX); } return 1; } @@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void inet_csk_reqsk_queue_add(struct sock *sk, - struct request_sock *req, - struct sock *child) -{ - reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); -} - -static inline void inet_csk_reqsk_queue_removed(struct sock *sk, - struct request_sock *req) -{ - if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) - inet_csk_delete_keepalive_timer(sk); -} - -static inline void inet_csk_reqsk_queue_added(struct sock *sk, - const unsigned long timeout) -{ - if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) - inet_csk_reset_keepalive_timer(sk, timeout); -} - -static inline int inet_csk_reqsk_queue_len(const struct sock *sk) -{ - return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) -{ - return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); -} - -static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); -} - -static inline void inet_csk_reqsk_queue_drop(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); - reqsk_free(req); -} - static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 6650d18e400f..ea0e1d87dc7e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -5,7 +5,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ - inet_timewait_sock.o \ + inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..2712400a8bb8 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c @@ -0,0 +1,401 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Support for INET connection oriented protocols. + * + * Authors: See the TCP sources + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or(at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); +#endif + +/* + * This array holds the first and last local port number. + * For high-usage systems, use sysctl to change this to + * 32768-61000 + */ +int sysctl_local_port_range[2] = { 1024, 4999 }; + +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +{ + const u32 sk_rcv_saddr = inet_rcv_saddr(sk); + struct sock *sk2; + struct hlist_node *node; + int reuse = sk->sk_reuse; + + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && + !inet_v6_ipv6only(sk2) && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if (!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) { + const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr || + sk2_rcv_saddr == sk_rcv_saddr) + break; + } + } + } + return node != NULL; +} + +/* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + */ +int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum) +{ + struct inet_bind_hashbucket *head; + struct hlist_node *node; + struct inet_bind_bucket *tb; + int ret; + + local_bh_disable(); + if (!snum) { + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + int rover; + + spin_lock(&hashinfo->portalloc_lock); + if (hashinfo->port_rover < low) + rover = low; + else + rover = hashinfo->port_rover; + do { + rover++; + if (rover > high) + rover = low; + head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) + if (tb->port == rover) + goto next; + break; + next: + spin_unlock(&head->lock); + } while (--remaining > 0); + hashinfo->port_rover = rover; + spin_unlock(&hashinfo->portalloc_lock); + + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ + ret = 1; + if (remaining <= 0) + goto fail; + + /* OK, here is the one we will use. HEAD is + * non-NULL and we hold it's mutex. + */ + snum = rover; + } else { + head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) + if (tb->port == snum) + goto tb_found; + } + tb = NULL; + goto tb_not_found; +tb_found: + if (!hlist_empty(&tb->owners)) { + if (sk->sk_reuse > 1) + goto success; + if (tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) { + goto success; + } else { + ret = 1; + if (inet_csk_bind_conflict(sk, tb)) + goto fail_unlock; + } + } +tb_not_found: + ret = 1; + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + goto fail_unlock; + if (hlist_empty(&tb->owners)) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) + tb->fastreuse = 1; + else + tb->fastreuse = 0; + } else if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; +success: + if (!inet_csk(sk)->icsk_bind_hash) + inet_bind_hash(sk, tb, snum); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + ret = 0; + +fail_unlock: + spin_unlock(&head->lock); +fail: + local_bh_enable(); + return ret; +} + +EXPORT_SYMBOL_GPL(inet_csk_get_port); + +/* + * Wait for an incoming connection, avoid race conditions. This must be called + * with the socket locked. + */ +static int inet_csk_wait_for_connect(struct sock *sk, long timeo) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + DEFINE_WAIT(wait); + int err; + + /* + * True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + * + * Subtle issue: "add_wait_queue_exclusive()" will be added + * after any current non-exclusive waiters, and we know that + * it will always _stay_ after any new non-exclusive waiters + * because all non-exclusive waiters are added at the + * beginning of the wait-queue. As such, it's ok to "drop" + * our exclusiveness temporarily when we get woken up without + * having to remove and re-insert us on the wait queue. + */ + for (;;) { + prepare_to_wait_exclusive(sk->sk_sleep, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) + timeo = schedule_timeout(timeo); + lock_sock(sk); + err = 0; + if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) + break; + err = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk->sk_sleep, &wait); + return err; +} + +/* + * This will accept the next outstanding connection. + */ +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct sock *newsk; + int error; + + lock_sock(sk); + + /* We need to make sure that this socket is listening, + * and that it has something pending. + */ + error = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + goto out_err; + + /* Find already established connection */ + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + /* If this is a non blocking socket don't sleep */ + error = -EAGAIN; + if (!timeo) + goto out_err; + + error = inet_csk_wait_for_connect(sk, timeo); + if (error) + goto out_err; + } + + newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); + BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); +out: + release_sock(sk); + return newsk; +out_err: + newsk = NULL; + *err = error; + goto out; +} + +EXPORT_SYMBOL(inet_csk_accept); + +/* + * Using different timers for retransmit, delayed acks and probes + * We may wish use just one timer maintaining a list of expire jiffies + * to optimize. + */ +void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + init_timer(&icsk->icsk_retransmit_timer); + init_timer(&icsk->icsk_delack_timer); + init_timer(&sk->sk_timer); + + icsk->icsk_retransmit_timer.function = retransmit_handler; + icsk->icsk_delack_timer.function = delack_handler; + sk->sk_timer.function = keepalive_handler; + + icsk->icsk_retransmit_timer.data = + icsk->icsk_delack_timer.data = + sk->sk_timer.data = (unsigned long)sk; + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; +} + +EXPORT_SYMBOL(inet_csk_init_xmit_timers); + +void inet_csk_clear_xmit_timers(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &icsk->icsk_delack_timer); + sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); + +void inet_csk_delete_keepalive_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); + +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); + +struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct rtable *rt; + const struct inet_request_sock *ireq = inet_rsk(req); + struct ip_options *opt = inet_rsk(req)->opt; + struct flowi fl = { .oif = sk->sk_bound_dev_if, + .nl_u = { .ip4_u = + { .daddr = ((opt && opt->srr) ? + opt->faddr : + ireq->rmt_addr), + .saddr = ireq->loc_addr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = inet_sk(sk)->sport, + .dport = ireq->rmt_port } } }; + + if (ip_route_output_flow(&rt, &fl, sk, 0)) { + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { + ip_rt_put(rt); + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + return &rt->u.dst; +} + +EXPORT_SYMBOL_GPL(inet_csk_route_req); + +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) +{ + return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) +#else +#define AF_INET_FAMILY(fam) 1 +#endif + +struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, const __u32 raddr, + const __u32 laddr) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + struct request_sock *req, **prev; + + for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; + (req = *prev) != NULL; + prev = &req->dl_next) { + const struct inet_request_sock *ireq = inet_rsk(req); + + if (ireq->rmt_port == rport && + ireq->rmt_addr == raddr && + ireq->loc_addr == laddr && + AF_INET_FAMILY(req->rsk_ops->family)) { + BUG_TRAP(!req->sk); + *prevp = prev; + break; + } + } + + return req; +} + +EXPORT_SYMBOL_GPL(inet_csk_search_req); + +void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + const unsigned timeout) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); + + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); + inet_csk_reqsk_queue_added(sk, timeout); +} + +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8177b86570db..581016a6a93f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1804,98 +1804,6 @@ int tcp_disconnect(struct sock *sk, int flags) return err; } -/* - * Wait for an incoming connection, avoid race - * conditions. This must be called with the socket locked. - */ -static int wait_for_connect(struct sock *sk, long timeo) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - DEFINE_WAIT(wait); - int err; - - /* - * True wake-one mechanism for incoming connections: only - * one process gets woken up, not the 'whole herd'. - * Since we do not 'race & poll' for established sockets - * anymore, the common case will execute the loop only once. - * - * Subtle issue: "add_wait_queue_exclusive()" will be added - * after any current non-exclusive waiters, and we know that - * it will always _stay_ after any new non-exclusive waiters - * because all non-exclusive waiters are added at the - * beginning of the wait-queue. As such, it's ok to "drop" - * our exclusiveness temporarily when we get woken up without - * having to remove and re-insert us on the wait queue. - */ - for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, - TASK_INTERRUPTIBLE); - release_sock(sk); - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) - timeo = schedule_timeout(timeo); - lock_sock(sk); - err = 0; - if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) - break; - err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!timeo) - break; - } - finish_wait(sk->sk_sleep, &wait); - return err; -} - -/* - * This will accept the next outstanding connection. - */ - -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *newsk; - int error; - - lock_sock(sk); - - /* We need to make sure that this socket is listening, - * and that it has something pending. - */ - error = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - goto out_err; - - /* Find already established connection */ - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - /* If this is a non blocking socket don't sleep */ - error = -EAGAIN; - if (!timeo) - goto out_err; - - error = wait_for_connect(sk, timeo); - if (error) - goto out_err; - } - - newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); -out: - release_sock(sk); - return newsk; -out_err: - newsk = NULL; - *err = error; - goto out; -} - /* * Socket option code for TCP. */ @@ -2344,7 +2252,6 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); } -EXPORT_SYMBOL(inet_csk_accept); EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a8c5c2d90cb..b35badf53aa5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1278,7 +1278,7 @@ static int tcp_check_sack_reneging(struct sock *sk) inet_csk(sk)->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto); + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1961,7 +1961,7 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } } @@ -2147,7 +2147,8 @@ static void tcp_ack_probe(struct sock *sk) */ } else { inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), + TCP_RTO_MAX); } } @@ -3968,7 +3969,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; tcp_incr_quickack(sk); tcp_enter_quickack_mode(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, TCP_RTO_MAX); discard: __kfree_skb(skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cd41265d17f..2f605b9e6b67 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,138 +97,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .port_rover = 1024 - 1, }; -/* - * This array holds the first and last local port number. - * For high-usage systems, use sysctl to change this to - * 32768-61000 - */ -int sysctl_local_port_range[2] = { 1024, 4999 }; - -static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) -{ - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); - struct sock *sk2; - struct hlist_node *node; - int reuse = sk->sk_reuse; - - sk_for_each_bound(sk2, node, &tb->owners) { - if (sk != sk2 && - !inet_v6_ipv6only(sk2) && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) - break; - } - } - } - return node != NULL; -} - -/* Obtain a reference to a local port for the given sock, - * if snum is zero it means select any available local port. - */ -int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum) -{ - struct inet_bind_hashbucket *head; - struct hlist_node *node; - struct inet_bind_bucket *tb; - int ret; - - local_bh_disable(); - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - - spin_lock(&hashinfo->portalloc_lock); - if (hashinfo->port_rover < low) - rover = low; - else - rover = hashinfo->port_rover; - do { - rover++; - if (rover > high) - rover = low; - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) - goto next; - break; - next: - spin_unlock(&head->lock); - } while (--remaining > 0); - hashinfo->port_rover = rover; - spin_unlock(&hashinfo->portalloc_lock); - - /* Exhausted local port range during search? It is not - * possible for us to be holding one of the bind hash - * locks if this test triggers, because if 'remaining' - * drops to zero, we broke out of the do/while loop at - * the top level, not from the 'break;' statement. - */ - ret = 1; - if (unlikely(remaining <= 0)) - goto fail; - - /* OK, here is the one we will use. HEAD is - * non-NULL and we hold it's mutex. - */ - snum = rover; - } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) - goto tb_found; - } - tb = NULL; - goto tb_not_found; -tb_found: - if (!hlist_empty(&tb->owners)) { - if (sk->sk_reuse > 1) - goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN) { - goto success; - } else { - ret = 1; - if (inet_csk_bind_conflict(sk, tb)) - goto fail_unlock; - } - } -tb_not_found: - ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) - goto fail_unlock; - if (hlist_empty(&tb->owners)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) - tb->fastreuse = 1; - else - tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; -success: - if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); - ret = 0; - -fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); - return ret; -} - static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { return inet_csk_get_port(&tcp_hashinfo, sk, snum); @@ -568,52 +436,6 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, - const u32 rnd, const u16 synq_hsize) -{ - return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); -} - -struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, - const __u16 rport, const __u32 raddr, - const __u32 laddr) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; - - for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { - const struct inet_request_sock *ireq = inet_rsk(req); - - if (ireq->rmt_port == rport && - ireq->rmt_addr == raddr && - ireq->loc_addr == laddr && - AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); - *prevp = prev; - break; - } - } - - return req; -} - -static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, - lopt->hash_rnd, lopt->nr_table_entries); - - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); - inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); -} - - /* * This routine does path mtu discovery as defined in RFC1191. */ @@ -963,36 +785,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) req->ts_recent); } -struct dst_entry* inet_csk_route_req(struct sock *sk, - const struct request_sock *req) -{ - struct rtable *rt; - const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = inet_sk(sk)->sport, - .dport = ireq->rmt_port } } }; - - if (ip_route_output_flow(&rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - return &rt->u.dst; -} - /* * Send a SYN-ACK after having received an ACK. * This still operates on a request_sock only, not on a big @@ -1222,7 +1014,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (want_cookie) { reqsk_free(req); } else { - tcp_v4_synq_add(sk, req); + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6f0a7e30ceac..f458eacb5ef2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1493,7 +1493,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto); + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); } packet_cnt -= tcp_skb_pcount(skb); @@ -1546,7 +1547,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; if (skb == skb_peek(&sk->sk_write_queue)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } @@ -1826,7 +1829,8 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 0; } @@ -1901,7 +1905,8 @@ void tcp_send_ack(struct sock *sk) if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, TCP_RTO_MAX); return; } @@ -2033,7 +2038,8 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_backoff++; tp->probes_out++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), + TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, * do not backoff and do not remember probes_out. @@ -2045,7 +2051,8 @@ void tcp_send_probe0(struct sock *sk) tp->probes_out=1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, - TCP_RESOURCE_PROBE_INTERVAL)); + TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0b71380ee42f..c03930c48f42 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,55 +36,14 @@ static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); static void tcp_keepalive_timer (unsigned long data); -#ifdef INET_CSK_DEBUG -const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; -EXPORT_SYMBOL(inet_csk_timer_bug_msg); -#endif - -/* - * Using different timers for retransmit, delayed acks and probes - * We may wish use just one timer maintaining a list of expire jiffies - * to optimize. - */ -void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - init_timer(&icsk->icsk_retransmit_timer); - init_timer(&icsk->icsk_delack_timer); - init_timer(&sk->sk_timer); - - icsk->icsk_retransmit_timer.function = retransmit_handler; - icsk->icsk_delack_timer.function = delack_handler; - sk->sk_timer.function = keepalive_handler; - - icsk->icsk_retransmit_timer.data = - icsk->icsk_delack_timer.data = - sk->sk_timer.data = (unsigned long)sk; - - icsk->icsk_pending = icsk->icsk_ack.pending = 0; -} - -void inet_csk_clear_xmit_timers(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; - - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); - sk_stop_timer(sk, &icsk->icsk_delack_timer); - sk_stop_timer(sk, &sk->sk_timer); -} - void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); } +EXPORT_SYMBOL(tcp_init_xmit_timers); + static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -392,7 +351,8 @@ static void tcp_retransmit_timer(struct sock *sk) if (!icsk->icsk_retransmits) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); goto out; } @@ -416,7 +376,7 @@ static void tcp_retransmit_timer(struct sock *sk) out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_tcp_retries1) __sk_dst_reset(sk); @@ -553,16 +513,6 @@ static void tcp_synack_timer(struct sock *sk) inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} - void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) @@ -653,8 +603,3 @@ out: bh_unlock_sock(sk); sock_put(sk); } - -EXPORT_SYMBOL(inet_csk_clear_xmit_timers); -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); -EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); From 9f1d2604c71498579609b1532fedc5a89276bb00 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:24 -0700 Subject: [PATCH 324/584] [ICSK]: Introduce inet_csk_clone Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++++ net/ipv4/inet_connection_sock.c | 25 +++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 18 +++--------------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 97e002001c1a..a50f4a4b7b4b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,10 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +extern struct sock *inet_csk_clone(struct sock *sk, + const struct request_sock *req, + const unsigned int __nocast priority); + enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2712400a8bb8..136ada050b63 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -399,3 +399,28 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); + +struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, + const unsigned int __nocast priority) +{ + struct sock *newsk = sk_clone(sk, priority); + + if (newsk != NULL) { + struct inet_connection_sock *newicsk = inet_csk(newsk); + + newsk->sk_state = TCP_SYN_RECV; + newicsk->icsk_bind_hash = NULL; + + inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + newsk->sk_write_space = sk_stream_write_space; + + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; + + /* Deinitialize accept_queue to trap illegal accesses. */ + memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + } + return newsk; +} + +EXPORT_SYMBOL_GPL(inet_csk_clone); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 56823704eb7d..4cfbe1d1c920 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -600,22 +600,14 @@ out: */ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) { - struct sock *newsk = sk_clone(sk, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); if (newsk != NULL) { - struct inet_request_sock *ireq = inet_rsk(req); + const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); - struct inet_sock *newinet = inet_sk(newsk); - struct inet_connection_sock *newicsk = inet_csk(newsk); + struct inet_connection_sock *newicsk = inet_csk(sk); struct tcp_sock *newtp; - newsk->sk_state = TCP_SYN_RECV; - newicsk->icsk_bind_hash = NULL; - - /* Clone the TCP header template */ - newinet->dport = ireq->rmt_port; - newsk->sk_write_space = sk_stream_write_space; - /* Now setup tcp_sock */ newtp = tcp_sk(newsk); newtp->pred_flags = 0; @@ -626,8 +618,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); - newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; newtp->srtt = 0; newtp->mdev = TCP_TIMEOUT_INIT; newicsk->icsk_rto = TCP_TIMEOUT_INIT; @@ -668,8 +658,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->probes_out = 0; newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; - /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, From 0a5578cf8e5e045aaa68643c17ce885426697c6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:41 -0700 Subject: [PATCH 325/584] [ICSK]: Generalise tcp_listen_{start,stop} This also moved inet_iif from tcp to inet_hashtables.h, as it is needed by the inet_lookup callers, perhaps this needs a bit of polishing, but for now seems fine. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 6 ++++++ include/net/sock.h | 1 + include/net/tcp.h | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 38 +++++++++++++++++++---------------- net/ipv4/tcp_ipv4.c | 6 +----- net/ipv6/tcp_ipv6.c | 1 + 7 files changed, 33 insertions(+), 25 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f0c21c07f894..646b6ea7fe26 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -280,6 +281,11 @@ out: wake_up(&hashinfo->lhash_wait); } +static inline int inet_iif(const struct sk_buff *skb) +{ + return ((struct rtable *)skb->dst)->rt_iif; +} + extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, diff --git a/include/net/sock.h b/include/net/sock.h index 48cc337a6566..8678313a22b4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -558,6 +558,7 @@ struct proto { kmem_cache_t *twsk_slab; unsigned int twsk_obj_size; + atomic_t *orphan_count; struct request_sock_ops *rsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index dd9a5a288f88..68f1ec1c583a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -860,7 +860,7 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void tcp_destroy_sock(struct sock *sk); +extern void inet_csk_destroy_sock(struct sock *sk); /* @@ -987,7 +987,7 @@ static __inline__ void tcp_done(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); } static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7137e6420d66..f691058cf599 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -202,7 +202,7 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - err = tcp_listen_start(sk); + err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); if (err) goto out; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 581016a6a93f..a1f812159ced 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -273,6 +273,8 @@ DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); atomic_t tcp_orphan_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(tcp_orphan_count); + int sysctl_tcp_mem[3]; int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; @@ -454,12 +456,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } - -int tcp_listen_start(struct sock *sk) +int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) { struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); if (rc != 0) return rc; @@ -488,12 +489,13 @@ int tcp_listen_start(struct sock *sk) return -EADDRINUSE; } +EXPORT_SYMBOL_GPL(inet_csk_listen_start); + /* * This routine closes sockets which have been at least partially * opened, but not yet accepted. */ - -static void tcp_listen_stop (struct sock *sk) +static void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; @@ -524,13 +526,13 @@ static void tcp_listen_stop (struct sock *sk) BUG_TRAP(!sock_owned_by_user(child)); sock_hold(child); - tcp_disconnect(child, O_NONBLOCK); + sk->sk_prot->disconnect(child, O_NONBLOCK); sock_orphan(child); - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); - tcp_destroy_sock(child); + inet_csk_destroy_sock(child); bh_unlock_sock(child); local_bh_enable(); @@ -542,6 +544,8 @@ static void tcp_listen_stop (struct sock *sk) BUG_TRAP(!sk->sk_ack_backlog); } +EXPORT_SYMBOL_GPL(inet_csk_listen_stop); + static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; @@ -1561,7 +1565,7 @@ void tcp_shutdown(struct sock *sk, int how) * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */ -void tcp_destroy_sock(struct sock *sk) +void inet_csk_destroy_sock(struct sock *sk) { BUG_TRAP(sk->sk_state == TCP_CLOSE); BUG_TRAP(sock_flag(sk, SOCK_DEAD)); @@ -1580,7 +1584,7 @@ void tcp_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(&tcp_orphan_count); + atomic_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -1596,7 +1600,7 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); /* Special case. */ - tcp_listen_stop(sk); + inet_csk_listen_stop(sk); goto adjudge_to_death; } @@ -1704,7 +1708,7 @@ adjudge_to_death: if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); } else { - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } @@ -1712,7 +1716,7 @@ adjudge_to_death: } if (sk->sk_state != TCP_CLOSE) { sk_stream_mem_reclaim(sk); - if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || + if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans || (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { if (net_ratelimit()) @@ -1723,10 +1727,10 @@ adjudge_to_death: NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); } } - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); if (sk->sk_state == TCP_CLOSE) - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: @@ -1757,7 +1761,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { - tcp_listen_stop(sk); + inet_csk_listen_stop(sk); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -2253,7 +2257,7 @@ void __init tcp_init(void) } EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_destroy_sock); +EXPORT_SYMBOL(inet_csk_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_ioctl); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2f605b9e6b67..b966102b9f39 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -431,11 +431,6 @@ failure: return err; } -static inline int inet_iif(const struct sk_buff *skb) -{ - return ((struct rtable *)skb->dst)->rt_iif; -} - /* * This routine does path mtu discovery as defined in RFC1191. */ @@ -1993,6 +1988,7 @@ struct proto tcp_prot = { .get_port = tcp_v4_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, .sockets_allocated = &tcp_sockets_allocated, + .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b9c7003b7f8b..0b51ec310ebe 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2248,6 +2248,7 @@ struct proto tcpv6_prot = { .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, + .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, From 295f7324ff8d9ea58b4d3ec93b1aaa1d80e048a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:56 -0700 Subject: [PATCH 326/584] [ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer With this we're very close to getting all of the current TCP refactorings in my dccp-2.6 tree merged, next changeset will export some functions needed by the current DCCP code and then dccp-2.6.git will be born! Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/inet_connection_sock.h | 2 ++ include/net/request_sock.h | 7 +++++ include/net/tcp.h | 3 +- net/core/request_sock.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 21 +++++++------- net/ipv4/tcp_input.c | 11 +++++-- net/ipv4/tcp_minisocks.c | 10 ++++--- net/ipv4/tcp_timer.c | 46 ++++++++++++++++++------------ 10 files changed, 67 insertions(+), 37 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 800930fac388..620096840744 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -270,7 +270,7 @@ struct tcp_sock { __u8 frto_counter; /* Number of new acks after RTO */ __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 defer_accept; /* User waits for some data after accept() */ + /* ONE BYTE HOLE, TRY TO PACK */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index a50f4a4b7b4b..692825fc8135 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, reqsk_free(req); } +extern void inet_csk_listen_stop(struct sock *sk); + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b7c7eecbe64d..447d287a38fd 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -97,6 +97,7 @@ struct listen_sock { * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children + * @rskq_defer_accept - User waits for some data after accept() * @syn_wait_lock - serializer * * %syn_wait_lock is necessary only to avoid proc interface having to grab the main @@ -112,6 +113,8 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; + u8 rskq_defer_accept; + /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; }; @@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, write_unlock(&queue->syn_wait_lock); } +extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, + const unsigned long interval, const unsigned long timeout, + const unsigned long max_rto, int max_retries); + #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 68f1ec1c583a..2423f059b62b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int tcp_listen_start(struct sock *sk); +extern int inet_csk_listen_start(struct sock *sk, + const int nr_table_entries); extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 98f0fc923f91..b8203de5ff07 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = queue->rskq_accept_head = NULL; + queue->rskq_defer_accept = 0; lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f691058cf599..52f5ecc58c46 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1f812159ced..a4e9eec44895 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); * This routine closes sockets which have been at least partially * opened, but not yet accepted. */ -static void inet_csk_listen_stop(struct sock *sk) +void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; @@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, break; case TCP_DEFER_ACCEPT: - tp->defer_accept = 0; + icsk->icsk_accept_queue.rskq_defer_accept = 0; if (val > 0) { /* Translate value in seconds to number of * retransmits */ - while (tp->defer_accept < 32 && + while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && val > ((TCP_TIMEOUT_INIT / HZ) << - tp->defer_accept)) - tp->defer_accept++; - tp->defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept)) + icsk->icsk_accept_queue.rskq_defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept++; } break; @@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int val, len; @@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: - val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; @@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << - (tp->defer_accept - 1)); + val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : + ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return 0; } case TCP_QUICKACK: - val = !inet_csk(sk)->icsk_ack.pingpong; + val = !icsk->icsk_ack.pingpong; break; case TCP_CONGESTION: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b35badf53aa5..71d456148de7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_parse_options(skb, &tp->rx_opt, 0); if (th->ack) { + struct inet_connection_sock *icsk; /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit @@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { + icsk = inet_csk(sk); + + if (sk->sk_write_pending || + icsk->icsk_accept_queue.rskq_defer_accept || + icsk->icsk_ack.pingpong) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; - inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.lrcvtime = tcp_time_stamp; + icsk->icsk_ack.ato = TCP_ATO_MIN; tcp_incr_quickack(sk); tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4cfbe1d1c920..2d95afe5b393 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - If tp->defer_accept, we silently drop this bare ACK. Otherwise, - we create an established connection. Both ends (listening sockets) - accept the new incoming connection and try to talk to each other. 8-) + If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this + bare ACK. Otherwise, we create an established connection. Both + ends (listening sockets) accept the new incoming connection and try + to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, return NULL; /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c03930c48f42..b614ad4d30c9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -424,16 +424,12 @@ out_unlock: sock_put(sk); } -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) +void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, + const unsigned long interval, const unsigned long timeout, + const unsigned long max_rto, int max_retries) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + struct inet_connection_sock *icsk = inet_csk(parent); + struct listen_sock *lopt = queue->listen_opt; int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; @@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk) } } - if (tp->defer_accept) - max_retries = tp->defer_accept; + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; - budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; do { @@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk) if (time_after_eq(now, req->expires)) { if ((req->retrans < thresh || (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { + && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { unsigned long timeo; if (req->retrans++ == 0) lopt->qlen_young--; - timeo = min((TCP_TIMEOUT_INIT << req->retrans), - TCP_RTO_MAX); + timeo = min((timeout << req->retrans), max_rto); req->expires = now + timeo; reqp = &req->dl_next; continue; } /* Drop this request */ - inet_csk_reqsk_queue_unlink(sk, req, reqp); + inet_csk_reqsk_queue_unlink(parent, req, reqp); reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_free(req); continue; @@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk) reqp = &req->dl_next; } - i = (i+1)&(TCP_SYNQ_HSIZE-1); + i = (i + 1) & (lopt->nr_table_entries - 1); } while (--budget > 0); lopt->clock_hand = i; if (lopt->qlen) - inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); + inet_csk_reset_keepalive_timer(parent, interval); +} + +EXPORT_SYMBOL_GPL(reqsk_queue_prune); + +/* + * Timer for listening sockets + */ + +static void tcp_synack_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, + TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries); } void tcp_set_keepalive(struct sock *sk, int val) From d8c97a9451068dd9f7b838a240bb6db894133a5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:12:12 -0700 Subject: [PATCH 327/584] [NET]: Export symbols needed by the current DCCP code Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 2 ++ net/ipv4/ip_output.c | 2 ++ net/ipv4/ip_sockglue.c | 2 -- net/ipv4/route.c | 4 ++++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 5cba59b869fe..22882d95f646 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -47,6 +47,8 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi inet_twsk_put(tw); } +EXPORT_SYMBOL_GPL(__inet_twsk_kill); + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dd568b0b7062..633945d27ac2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -158,6 +158,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, dst_output); } +EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); + static inline int ip_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ff4bd067b397..ddb1aedbdc6d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1090,7 +1090,5 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, EXPORT_SYMBOL(ip_cmsg_recv); -#ifdef CONFIG_IP_SCTP_MODULE EXPORT_SYMBOL(ip_getsockopt); EXPORT_SYMBOL(ip_setsockopt); -#endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3aef0e15460f..8c0b14e3beec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2602,6 +2602,8 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) return ip_route_output_slow(rp, flp); } +EXPORT_SYMBOL_GPL(__ip_route_output_key); + int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) { int err; @@ -2620,6 +2622,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, return 0; } +EXPORT_SYMBOL_GPL(ip_route_output_flow); + int ip_route_output_key(struct rtable **rp, struct flowi *flp) { return ip_route_output_flow(rp, flp, NULL, 0); From c4365c9235f80128c3c3d5993074173941b1c1f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:12:30 -0700 Subject: [PATCH 328/584] [RANDOM]: Introduce secure_dccp_sequence_number Code contributed by Stephen Hemminger. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/random.h | 2 ++ 2 files changed, 36 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6b11d6b2129f..7999da25fe40 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1589,6 +1589,40 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); #endif +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +/* Similar to secure_tcp_sequence_number but generate a 48 bit value + * bit's 32-47 increase every key exchange + * 0-31 hash(source, dest) + */ +u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport) +{ + struct timeval tv; + u64 seq; + __u32 hash[4]; + struct keydata *keyptr = get_keyptr(); + + hash[0] = saddr; + hash[1] = daddr; + hash[2] = (sport << 16) + dport; + hash[3] = keyptr->secret[11]; + + seq = half_md4_transform(hash, keyptr->secret); + seq |= ((u64)keyptr->count) << (32 - HASH_BITS); + + do_gettimeofday(&tv); + seq += tv.tv_usec + tv.tv_sec * 1000000; + seq &= (1ull << 48) - 1; +#if 0 + printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", + saddr, daddr, sport, dport, seq); +#endif + return seq; +} + +EXPORT_SYMBOL(secure_dccp_sequence_number); +#endif + #endif /* CONFIG_INET */ diff --git a/include/linux/random.h b/include/linux/random.h index cc6703449916..7b2adb3322d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, __u16 sport, __u16 dport); +extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport); #ifndef MODULE extern struct file_operations random_fops, urandom_fops; From 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:14:34 -0700 Subject: [PATCH 329/584] [DCCP]: Initial implementation Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 432 ++++++++ include/linux/in.h | 1 + include/linux/net.h | 1 + include/linux/socket.h | 1 + net/Kconfig | 1 + net/Makefile | 1 + net/dccp/Kconfig | 24 + net/dccp/Makefile | 5 + net/dccp/ccid.c | 139 +++ net/dccp/ccid.h | 156 +++ net/dccp/ccids/Kconfig | 25 + net/dccp/ccids/Makefile | 3 + net/dccp/ccids/ccid3.c | 2164 +++++++++++++++++++++++++++++++++++++++ net/dccp/ccids/ccid3.h | 137 +++ net/dccp/dccp.h | 422 ++++++++ net/dccp/input.c | 510 +++++++++ net/dccp/ipv4.c | 1289 +++++++++++++++++++++++ net/dccp/minisocks.c | 199 ++++ net/dccp/options.c | 763 ++++++++++++++ net/dccp/output.c | 406 ++++++++ net/dccp/proto.c | 818 +++++++++++++++ net/dccp/timer.c | 249 +++++ 22 files changed, 7746 insertions(+) create mode 100644 include/linux/dccp.h create mode 100644 net/dccp/Kconfig create mode 100644 net/dccp/Makefile create mode 100644 net/dccp/ccid.c create mode 100644 net/dccp/ccid.h create mode 100644 net/dccp/ccids/Kconfig create mode 100644 net/dccp/ccids/Makefile create mode 100644 net/dccp/ccids/ccid3.c create mode 100644 net/dccp/ccids/ccid3.h create mode 100644 net/dccp/dccp.h create mode 100644 net/dccp/input.c create mode 100644 net/dccp/ipv4.c create mode 100644 net/dccp/minisocks.c create mode 100644 net/dccp/options.c create mode 100644 net/dccp/output.c create mode 100644 net/dccp/proto.c create mode 100644 net/dccp/timer.c diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 000000000000..e3b4bf7346bb --- /dev/null +++ b/include/linux/dccp.h @@ -0,0 +1,432 @@ +#ifndef _LINUX_DCCP_H +#define _LINUX_DCCP_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* FIXME: this is utterly wrong */ +struct sockaddr_dccp { + struct sockaddr_in in; + unsigned int service; +}; + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +/** + * struct dccp_hdr - generic part of DCCP packet header + * + * @dccph_sport - Relevant port on the endpoint that sent this packet + * @dccph_dport - Relevant port on the other endpoint + * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words + * @dccph_ccval - Used by the HC-Sender CCID + * @dccph_cscov - Parts of the packet that are covered by the Checksum field + * @dccph_checksum - Internet checksum, depends on dccph_cscov + * @dccph_x - 0 = 24 bit sequence number, 1 = 48 + * @dccph_type - packet type, see DCCP_PKT_ prefixed macros + * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x + */ +struct dccp_hdr { + __u16 dccph_sport, + dccph_dport; + __u8 dccph_doff; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 dccph_cscov:4, + dccph_ccval:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 dccph_ccval:4, + dccph_cscov:4; +#else +#error "Adjust your defines" +#endif + __u16 dccph_checksum; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 dccph_x:1, + dccph_type:4, + dccph_reserved:3, + dccph_seq:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 dccph_reserved:3, + dccph_type:4, + dccph_x:1, + dccph_seq:24; +#else +#error "Adjust your defines" +#endif +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +/** + * struct dccp_hdr_ext - the low bits of a 48 bit seq packet + * + * @dccph_seq_low - low 24 bits of a 48 bit seq packet + */ +struct dccp_hdr_ext { + __u32 dccph_seq_low; +}; + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +/** + * struct dccp_hdr_request - Conection initiation request header + * + * @dccph_req_service - Service to which the client app wants to connect + * @dccph_req_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_request { + __u32 dccph_req_service; +}; + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + */ +struct dccp_hdr_ack_bits { + __u32 dccph_reserved1:8, + dccph_ack_nr_high:24; + __u32 dccph_ack_nr_low; +}; + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +/** + * struct dccp_hdr_response - Conection initiation response header + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request + * @dccph_resp_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_response { + struct dccp_hdr_ack_bits dccph_resp_ack; + __u32 dccph_resp_service; +}; + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_reset - Unconditionally shut down a connection + * + * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request + * @dccph_reset_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_reset { + struct dccp_hdr_ack_bits dccph_reset_ack; + __u8 dccph_reset_code, + dccph_reset_data[3]; +}; + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +enum dccp_pkt_type { + DCCP_PKT_REQUEST = 0, + DCCP_PKT_RESPONSE, + DCCP_PKT_DATA, + DCCP_PKT_ACK, + DCCP_PKT_DATAACK, + DCCP_PKT_CLOSEREQ, + DCCP_PKT_CLOSE, + DCCP_PKT_RESET, + DCCP_PKT_SYNC, + DCCP_PKT_SYNCACK, + DCCP_PKT_INVALID, +}; + +#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID + +static inline unsigned int dccp_packet_hdr_len(const __u8 type) +{ + if (type == DCCP_PKT_DATA) + return 0; + if (type == DCCP_PKT_DATAACK || + type == DCCP_PKT_ACK || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ) + return sizeof(struct dccp_hdr_ack_bits); + if (type == DCCP_PKT_REQUEST) + return sizeof(struct dccp_hdr_request); + if (type == DCCP_PKT_RESPONSE) + return sizeof(struct dccp_hdr_response); + return sizeof(struct dccp_hdr_reset); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + +enum dccp_reset_codes { + DCCP_RESET_CODE_UNSPECIFIED = 0, + DCCP_RESET_CODE_CLOSED, + DCCP_RESET_CODE_ABORTED, + DCCP_RESET_CODE_NO_CONNECTION, + DCCP_RESET_CODE_PACKET_ERROR, + DCCP_RESET_CODE_OPTION_ERROR, + DCCP_RESET_CODE_MANDATORY_ERROR, + DCCP_RESET_CODE_CONNECTION_REFUSED, + DCCP_RESET_CODE_BAD_SERVICE_CODE, + DCCP_RESET_CODE_TOO_BUSY, + DCCP_RESET_CODE_BAD_INIT_COOKIE, + DCCP_RESET_CODE_AGGRESSION_PENALTY, +}; + +/* DCCP options */ +enum { + DCCPO_PADDING = 0, + DCCPO_MANDATORY = 1, + DCCPO_MIN_RESERVED = 3, + DCCPO_MAX_RESERVED = 31, + DCCPO_NDP_COUNT = 37, + DCCPO_ACK_VECTOR_0 = 38, + DCCPO_ACK_VECTOR_1 = 39, + DCCPO_TIMESTAMP = 41, + DCCPO_TIMESTAMP_ECHO = 42, + DCCPO_ELAPSED_TIME = 43, + DCCPO_MAX = 45, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, +}; + +/* DCCP features */ +enum { + DCCPF_RESERVED = 0, + DCCPF_SEQUENCE_WINDOW = 3, + DCCPF_SEND_ACK_VECTOR = 6, + DCCPF_SEND_NDP_COUNT = 7, + /* 10-127 reserved */ + DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_MAX_CCID_SPECIFIC = 255, +}; + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +/* FIXME: for now we're using CCID 3 (TFRC) */ +#define DCCPF_INITIAL_CCID 3 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 0 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +#define DCCP_NDP_LIMIT 0xFFFFFF + +/** + * struct dccp_options - option values for a DCCP connection + * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpo_ccid - Congestion Control Id (CCID) (section 10) + * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) + */ +struct dccp_options { + __u64 dccpo_sequence_window; + __u8 dccpo_ccid; + __u8 dccpo_send_ack_vector; + __u8 dccpo_send_ndp_count; +}; + +extern void __dccp_options_init(struct dccp_options *dccpo); +extern void dccp_options_init(struct dccp_options *dccpo); +extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); + +struct dccp_request_sock { + struct inet_request_sock dreq_inet_rsk; + __u64 dreq_iss; + __u64 dreq_isr; + __u32 dreq_service; +}; + +static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) +{ + return (struct dccp_request_sock *)req; +} + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACK_VECTOR_LEN 253 + +struct dccp_options_received { + u32 dccpor_ndp:24, + dccpor_ack_vector_len:8; + u32 dccpor_ack_vector_idx:10; + /* 22 bits hole, try to pack */ + u32 dccpor_timestamp; + u32 dccpor_timestamp_echo; + u32 dccpor_elapsed_time; +}; + +struct ccid; + +enum dccp_role { + DCCP_ROLE_UNDEFINED, + DCCP_ROLE_LISTEN, + DCCP_ROLE_CLIENT, + DCCP_ROLE_SERVER, +}; + +/** + * struct dccp_sock - DCCP socket state + * + * @dccps_swl - sequence number window low + * @dccps_swh - sequence number window high + * @dccps_awl - acknowledgement number window low + * @dccps_awh - acknowledgement number window high + * @dccps_iss - initial sequence number sent + * @dccps_isr - initial sequence number received + * @dccps_osr - first OPEN sequence number received + * @dccps_gss - greatest sequence number sent + * @dccps_gsr - greatest valid sequence number received + * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss + * @dccps_timestamp_time - time of latest TIMESTAMP option + * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option + * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) + * @dccps_pmtu_cookie - Last pmtu seen by socket + * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_role - Role of this sock, one of %dccp_role + * @dccps_ndp_count - number of Non Data Packets since last data packet + * @dccps_hc_rx_ackpkts - receiver half connection acked packets + */ +struct dccp_sock { + /* inet_connection_sock has to be the first member of dccp_sock */ + struct inet_connection_sock dccps_inet_connection; + __u64 dccps_swl; + __u64 dccps_swh; + __u64 dccps_awl; + __u64 dccps_awh; + __u64 dccps_iss; + __u64 dccps_isr; + __u64 dccps_osr; + __u64 dccps_gss; + __u64 dccps_gsr; + __u64 dccps_gar; + unsigned long dccps_service; + unsigned long dccps_timestamp_time; + __u32 dccps_timestamp_echo; + __u32 dccps_avg_packet_size; + unsigned long dccps_ndp_count; + __u16 dccps_ext_header_len; + __u32 dccps_pmtu_cookie; + __u32 dccps_mss_cache; + struct dccp_options dccps_options; + struct dccp_ackpkts *dccps_hc_rx_ackpkts; + void *dccps_hc_rx_ccid_private; + void *dccps_hc_tx_ccid_private; + struct ccid *dccps_hc_rx_ccid; + struct ccid *dccps_hc_tx_ccid; + struct dccp_options_received dccps_options_received; + enum dccp_role dccps_role:2; +}; + +static inline struct dccp_sock *dccp_sk(const struct sock *sk) +{ + return (struct dccp_sock *)sk; +} + +static inline const char *dccp_role(const struct sock *sk) +{ + switch (dccp_sk(sk)->dccps_role) { + case DCCP_ROLE_UNDEFINED: return "undefined"; + case DCCP_ROLE_LISTEN: return "listen"; + case DCCP_ROLE_SERVER: return "server"; + case DCCP_ROLE_CLIENT: return "client"; + } + return NULL; +} + +#endif /* _LINUX_DCCP_H */ diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748d..ba355384016a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -32,6 +32,7 @@ enum { IPPROTO_PUP = 12, /* PUP protocol */ IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */ + IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ IPPROTO_RSVP = 46, /* RSVP protocol */ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ diff --git a/include/linux/net.h b/include/linux/net.h index 39906619b9d7..5f8b632ff653 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -84,6 +84,7 @@ enum sock_type { SOCK_RAW = 3, SOCK_RDM = 4, SOCK_SEQPACKET = 5, + SOCK_DCCP = 6, SOCK_PACKET = 10, }; diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2e..ddf22559f484 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -271,6 +271,7 @@ struct ucred { #define SOL_IRDA 266 #define SOL_NETBEUI 267 #define SOL_LLC 268 +#define SOL_DCCP 269 /* IPX options */ #define IPX_TYPE 1 diff --git a/net/Kconfig b/net/Kconfig index 02877ac0f2f4..c07aafb59a0f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4a01be8d3e1e..7e6eff206c81 100644 --- a/net/Makefile +++ b/net/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_VLAN_8021Q) += 8021q/ +obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ ifeq ($(CONFIG_NET),y) diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..90460bc629b3 --- /dev/null +++ b/net/dccp/Kconfig @@ -0,0 +1,24 @@ +menu "DCCP Configuration (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL + +config IP_DCCP + tristate "The DCCP Protocol (EXPERIMENTAL)" + ---help--- + Datagram Congestion Control Protocol + + From draft-ietf-dccp-spec-11 . + + The Datagram Congestion Control Protocol (DCCP) is a transport + protocol that implements bidirectional, unicast connections of + congestion-controlled, unreliable datagrams. It should be suitable + for use by applications such as streaming media, Internet telephony, + and on-line games + + To compile this protocol support as a module, choose M here: the + module will be called dccp. + + If in doubt, say N. + +source "net/dccp/ccids/Kconfig" + +endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..c6e6ba55c36b --- /dev/null +++ b/net/dccp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_IP_DCCP) += dccp.o + +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o + +obj-y += ccids/ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c @@ -0,0 +1,139 @@ +/* + * net/dccp/ccid.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "ccid.h" + +static struct ccid *ccids[CCID_MAX]; +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +static atomic_t ccids_lockct = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(ccids_lock); + +/* + * The strategy is: modifications ccids vector are short, do not sleep and + * veeery rare, but read access should be free of any exclusive locks. + */ +static void ccids_write_lock(void) +{ + spin_lock(&ccids_lock); + while (atomic_read(&ccids_lockct) != 0) { + spin_unlock(&ccids_lock); + yield(); + spin_lock(&ccids_lock); + } +} + +static inline void ccids_write_unlock(void) +{ + spin_unlock(&ccids_lock); +} + +static inline void ccids_read_lock(void) +{ + atomic_inc(&ccids_lockct); + spin_unlock_wait(&ccids_lock); +} + +static inline void ccids_read_unlock(void) +{ + atomic_dec(&ccids_lockct); +} + +#else +#define ccids_write_lock() do { } while(0) +#define ccids_write_unlock() do { } while(0) +#define ccids_read_lock() do { } while(0) +#define ccids_read_unlock() do { } while(0) +#endif + +int ccid_register(struct ccid *ccid) +{ + int err; + + if (ccid->ccid_init == NULL) + return -1; + + ccids_write_lock(); + err = -EEXIST; + if (ccids[ccid->ccid_id] == NULL) { + ccids[ccid->ccid_id] = ccid; + err = 0; + } + ccids_write_unlock(); + if (err == 0) + pr_info("CCID: Registered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return err; +} + +EXPORT_SYMBOL_GPL(ccid_register); + +int ccid_unregister(struct ccid *ccid) +{ + ccids_write_lock(); + ccids[ccid->ccid_id] = NULL; + ccids_write_unlock(); + pr_info("CCID: Unregistered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return 0; +} + +EXPORT_SYMBOL_GPL(ccid_unregister); + +struct ccid *ccid_init(unsigned char id, struct sock *sk) +{ + struct ccid *ccid; + +#ifdef CONFIG_KMOD + if (ccids[id] == NULL) + request_module("net-dccp-ccid-%d", id); +#endif + ccids_read_lock(); + + ccid = ccids[id]; + if (ccid == NULL) + goto out; + + if (!try_module_get(ccid->ccid_owner)) + goto out_err; + + if (ccid->ccid_init(sk) != 0) + goto out_module_put; +out: + ccids_read_unlock(); + return ccid; +out_module_put: + module_put(ccid->ccid_owner); +out_err: + ccid = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(ccid_init); + +void ccid_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid == NULL) + return; + + ccids_read_lock(); + + if (ccids[ccid->ccid_id] != NULL) { + if (ccid->ccid_exit != NULL) + ccid->ccid_exit(sk); + module_put(ccid->ccid_owner); + } + + ccids_read_unlock(); +} + +EXPORT_SYMBOL_GPL(ccid_exit); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..06105b2a613c --- /dev/null +++ b/net/dccp/ccid.h @@ -0,0 +1,156 @@ +#ifndef _CCID_H +#define _CCID_H +/* + * net/dccp/ccid.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define CCID_MAX 255 + +struct ccid { + unsigned char ccid_id; + const char *ccid_name; + struct module *ccid_owner; + int (*ccid_init)(struct sock *sk); + void (*ccid_exit)(struct sock *sk); + int (*ccid_hc_rx_init)(struct sock *sk); + int (*ccid_hc_tx_init)(struct sock *sk); + void (*ccid_hc_rx_exit)(struct sock *sk); + void (*ccid_hc_tx_exit)(struct sock *sk); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_rx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_tx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + int (*ccid_hc_tx_send_packet)(struct sock *sk, + struct sk_buff *skb, int len, + long *delay); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); +}; + +extern int ccid_register(struct ccid *ccid); +extern int ccid_unregister(struct ccid *ccid); + +extern struct ccid *ccid_init(unsigned char id, struct sock *sk); +extern void ccid_exit(struct ccid *ccid, struct sock *sk); + +static inline void __ccid_get(struct ccid *ccid) +{ + __module_get(ccid->ccid_owner); +} + +static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb, int len, + long *delay) +{ + int rc = 0; + if (ccid->ccid_hc_tx_send_packet != NULL) + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); + return rc; +} + +static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, + int more, int len) +{ + if (ccid->ccid_hc_tx_packet_sent != NULL) + ccid->ccid_hc_tx_packet_sent(sk, more, len); +} + +static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_rx_init != NULL) + rc = ccid->ccid_hc_rx_init(sk); + return rc; +} + +static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_tx_init != NULL) + rc = ccid->ccid_hc_tx_init(sk); + return rc; +} + +static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_rx_exit != NULL) + ccid->ccid_hc_rx_exit(sk); +} + +static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_tx_exit != NULL) + ccid->ccid_hc_tx_exit(sk); +} + +static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_packet_recv != NULL) + ccid->ccid_hc_rx_packet_recv(sk, skb); +} + +static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_packet_recv != NULL) + ccid->ccid_hc_tx_packet_recv(sk, skb); +} + +static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_tx_parse_options != NULL) + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_rx_parse_options != NULL) + rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_insert_options != NULL) + ccid->ccid_hc_tx_insert_options(sk, skb); +} + +static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_insert_options != NULL) + ccid->ccid_hc_rx_insert_options(sk, skb); +} +#endif /* _CCID_H */ diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..67f9c06bd179 --- /dev/null +++ b/net/dccp/ccids/Kconfig @@ -0,0 +1,25 @@ +menu "DCCP CCIDs Configuration (EXPERIMENTAL)" + depends on IP_DCCP && EXPERIMENTAL + +config IP_DCCP_CCID3 + tristate "CCID3 (TFRC) (EXPERIMENTAL)" + depends on IP_DCCP + ---help--- + CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + rate-controlled congestion control mechanism. TFRC is designed to + be reasonably fair when competing for bandwidth with TCP-like flows, + where a flow is "reasonably fair" if its sending rate is generally + within a factor of two of the sending rate of a TCP flow under the + same conditions. However, TFRC has a much lower variation of + throughput over time compared with TCP, which makes CCID 3 more + suitable than CCID 2 for applications such streaming media where a + relatively smooth sending rate is of importance. + + CCID 3 is further described in [CCID 3 PROFILE]. The TFRC + congestion control algorithms were initially described in RFC 3448. + + This text was extracted from draft-ietf-dccp-spec-11.txt. + + If in doubt, say M. + +endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..1c720131c5db --- /dev/null +++ b/net/dccp/ccids/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o + +dccp_ccid3-y := ccid3.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..4f45902cb55e --- /dev/null +++ b/net/dccp/ccids/ccid3.c @@ -0,0 +1,2164 @@ +/* + * net/dccp/ccids/ccid3.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "../ccid.h" +#include "../dccp.h" +#include "ccid3.h" + +#ifdef CCID3_DEBUG +extern int ccid3_debug; + +#define ccid3_pr_debug(format, a...) \ + do { if (ccid3_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ + } while (0) +#else +#define ccid3_pr_debug(format, a...) +#endif + +#define TFRC_MIN_PACKET_SIZE 16 +#define TFRC_STD_PACKET_SIZE 256 +#define TFRC_MAX_PACKET_SIZE 65535 + +#define USEC_IN_SEC 1000000 + +#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) +/* two seconds as per CCID3 spec 11 */ + +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) +/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + +#define TFRC_MAX_BACK_OFF_TIME 64 +/* above is in seconds */ + +#define TFRC_SMALLEST_P 40 + +#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ + +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +enum ccid3_options { + TFRC_OPT_LOSS_EVENT_RATE = 192, + TFRC_OPT_LOSS_INTERVALS = 193, + TFRC_OPT_RECEIVE_RATE = 194, +}; + +static int ccid3_debug; + +static kmem_cache_t *ccid3_tx_hist_slab; +static kmem_cache_t *ccid3_rx_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab; + +static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) +{ + struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); + + if (entry != NULL) + entry->ccid3htx_sent = 0; + + return entry; +} + +static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_tx_hist_slab, entry); +} + +static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, + struct sk_buff *skb, + int prio) +{ + struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->ccid3hrx_win_count = dh->dccph_ccval; + entry->ccid3hrx_type = dh->dccph_type; + entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; + do_gettimeofday(&(entry->ccid3hrx_tstamp)); + } + + return entry; +} + +static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_rx_hist_slab, entry); +} + +static void ccid3_rx_history_delete(struct list_head *hist) +{ + struct ccid3_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { + list_del_init(&entry->ccid3hrx_node); + kmem_cache_free(ccid3_rx_hist_slab, entry); + } +} + +static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) +{ + return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); +} + +static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); +} + +static void ccid3_loss_interval_history_delete(struct list_head *hist) +{ + struct ccid3_loss_interval_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { + list_del_init(&entry->ccid3lih_node); + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); + } +} + +static int ccid3_init(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + return 0; +} + +static void ccid3_exit(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); +} + +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) +{ + static char *ccid3_state_names[] = { + [TFRC_SSTATE_NO_SENT] = "NO_SENT", + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", + [TFRC_SSTATE_FBACK] = "FBACK", + [TFRC_SSTATE_TERM] = "TERM", + }; + + return ccid3_state_names[state]; +} +#endif + +static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); + WARN_ON(state == oldstate); + hctx->ccid3hctx_state = state; +} + +static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { + + result->tv_sec = large.tv_sec-small.tv_sec; + if (large.tv_usec < small.tv_usec) { + (result->tv_sec)--; + result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; + } else + result->tv_usec = large.tv_usec-small.tv_usec; +} + +static inline void timeval_fix(struct timeval *tv) { + if (tv->tv_usec >= USEC_IN_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_IN_SEC; + } +} + +/* returns the difference in usecs between timeval passed in and current time */ +static inline u32 now_delta(struct timeval tv) { + struct timeval now; + + do_gettimeofday(&now); + return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); +} + +#define CALCX_ARRSIZE 500 + +#define CALCX_SPLIT 50000 +/* equivalent to 0.05 */ + +static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { + { 37172 , 8172 }, + { 53499 , 11567 }, + { 66664 , 14180 }, + { 78298 , 16388 }, + { 89021 , 18339 }, + { 99147 , 20108 }, + { 108858 , 21738 }, + { 118273 , 23260 }, + { 127474 , 24693 }, + { 136520 , 26052 }, + { 145456 , 27348 }, + { 154316 , 28589 }, + { 163130 , 29783 }, + { 171919 , 30935 }, + { 180704 , 32049 }, + { 189502 , 33130 }, + { 198328 , 34180 }, + { 207194 , 35202 }, + { 216114 , 36198 }, + { 225097 , 37172 }, + { 234153 , 38123 }, + { 243294 , 39055 }, + { 252527 , 39968 }, + { 261861 , 40864 }, + { 271305 , 41743 }, + { 280866 , 42607 }, + { 290553 , 43457 }, + { 300372 , 44293 }, + { 310333 , 45117 }, + { 320441 , 45929 }, + { 330705 , 46729 }, + { 341131 , 47518 }, + { 351728 , 48297 }, + { 362501 , 49066 }, + { 373460 , 49826 }, + { 384609 , 50577 }, + { 395958 , 51320 }, + { 407513 , 52054 }, + { 419281 , 52780 }, + { 431270 , 53499 }, + { 443487 , 54211 }, + { 455940 , 54916 }, + { 468635 , 55614 }, + { 481581 , 56306 }, + { 494785 , 56991 }, + { 508254 , 57671 }, + { 521996 , 58345 }, + { 536019 , 59014 }, + { 550331 , 59677 }, + { 564939 , 60335 }, + { 579851 , 60988 }, + { 595075 , 61636 }, + { 610619 , 62279 }, + { 626491 , 62918 }, + { 642700 , 63553 }, + { 659253 , 64183 }, + { 676158 , 64809 }, + { 693424 , 65431 }, + { 711060 , 66050 }, + { 729073 , 66664 }, + { 747472 , 67275 }, + { 766266 , 67882 }, + { 785464 , 68486 }, + { 805073 , 69087 }, + { 825103 , 69684 }, + { 845562 , 70278 }, + { 866460 , 70868 }, + { 887805 , 71456 }, + { 909606 , 72041 }, + { 931873 , 72623 }, + { 954614 , 73202 }, + { 977839 , 73778 }, + { 1001557 , 74352 }, + { 1025777 , 74923 }, + { 1050508 , 75492 }, + { 1075761 , 76058 }, + { 1101544 , 76621 }, + { 1127867 , 77183 }, + { 1154739 , 77741 }, + { 1182172 , 78298 }, + { 1210173 , 78852 }, + { 1238753 , 79405 }, + { 1267922 , 79955 }, + { 1297689 , 80503 }, + { 1328066 , 81049 }, + { 1359060 , 81593 }, + { 1390684 , 82135 }, + { 1422947 , 82675 }, + { 1455859 , 83213 }, + { 1489430 , 83750 }, + { 1523671 , 84284 }, + { 1558593 , 84817 }, + { 1594205 , 85348 }, + { 1630518 , 85878 }, + { 1667543 , 86406 }, + { 1705290 , 86932 }, + { 1743770 , 87457 }, + { 1782994 , 87980 }, + { 1822973 , 88501 }, + { 1863717 , 89021 }, + { 1905237 , 89540 }, + { 1947545 , 90057 }, + { 1990650 , 90573 }, + { 2034566 , 91087 }, + { 2079301 , 91600 }, + { 2124869 , 92111 }, + { 2171279 , 92622 }, + { 2218543 , 93131 }, + { 2266673 , 93639 }, + { 2315680 , 94145 }, + { 2365575 , 94650 }, + { 2416371 , 95154 }, + { 2468077 , 95657 }, + { 2520707 , 96159 }, + { 2574271 , 96660 }, + { 2628782 , 97159 }, + { 2684250 , 97658 }, + { 2740689 , 98155 }, + { 2798110 , 98651 }, + { 2856524 , 99147 }, + { 2915944 , 99641 }, + { 2976382 , 100134 }, + { 3037850 , 100626 }, + { 3100360 , 101117 }, + { 3163924 , 101608 }, + { 3228554 , 102097 }, + { 3294263 , 102586 }, + { 3361063 , 103073 }, + { 3428966 , 103560 }, + { 3497984 , 104045 }, + { 3568131 , 104530 }, + { 3639419 , 105014 }, + { 3711860 , 105498 }, + { 3785467 , 105980 }, + { 3860253 , 106462 }, + { 3936229 , 106942 }, + { 4013410 , 107422 }, + { 4091808 , 107902 }, + { 4171435 , 108380 }, + { 4252306 , 108858 }, + { 4334431 , 109335 }, + { 4417825 , 109811 }, + { 4502501 , 110287 }, + { 4588472 , 110762 }, + { 4675750 , 111236 }, + { 4764349 , 111709 }, + { 4854283 , 112182 }, + { 4945564 , 112654 }, + { 5038206 , 113126 }, + { 5132223 , 113597 }, + { 5227627 , 114067 }, + { 5324432 , 114537 }, + { 5422652 , 115006 }, + { 5522299 , 115474 }, + { 5623389 , 115942 }, + { 5725934 , 116409 }, + { 5829948 , 116876 }, + { 5935446 , 117342 }, + { 6042439 , 117808 }, + { 6150943 , 118273 }, + { 6260972 , 118738 }, + { 6372538 , 119202 }, + { 6485657 , 119665 }, + { 6600342 , 120128 }, + { 6716607 , 120591 }, + { 6834467 , 121053 }, + { 6953935 , 121514 }, + { 7075025 , 121976 }, + { 7197752 , 122436 }, + { 7322131 , 122896 }, + { 7448175 , 123356 }, + { 7575898 , 123815 }, + { 7705316 , 124274 }, + { 7836442 , 124733 }, + { 7969291 , 125191 }, + { 8103877 , 125648 }, + { 8240216 , 126105 }, + { 8378321 , 126562 }, + { 8518208 , 127018 }, + { 8659890 , 127474 }, + { 8803384 , 127930 }, + { 8948702 , 128385 }, + { 9095861 , 128840 }, + { 9244875 , 129294 }, + { 9395760 , 129748 }, + { 9548529 , 130202 }, + { 9703198 , 130655 }, + { 9859782 , 131108 }, + { 10018296 , 131561 }, + { 10178755 , 132014 }, + { 10341174 , 132466 }, + { 10505569 , 132917 }, + { 10671954 , 133369 }, + { 10840345 , 133820 }, + { 11010757 , 134271 }, + { 11183206 , 134721 }, + { 11357706 , 135171 }, + { 11534274 , 135621 }, + { 11712924 , 136071 }, + { 11893673 , 136520 }, + { 12076536 , 136969 }, + { 12261527 , 137418 }, + { 12448664 , 137867 }, + { 12637961 , 138315 }, + { 12829435 , 138763 }, + { 13023101 , 139211 }, + { 13218974 , 139658 }, + { 13417071 , 140106 }, + { 13617407 , 140553 }, + { 13819999 , 140999 }, + { 14024862 , 141446 }, + { 14232012 , 141892 }, + { 14441465 , 142339 }, + { 14653238 , 142785 }, + { 14867346 , 143230 }, + { 15083805 , 143676 }, + { 15302632 , 144121 }, + { 15523842 , 144566 }, + { 15747453 , 145011 }, + { 15973479 , 145456 }, + { 16201939 , 145900 }, + { 16432847 , 146345 }, + { 16666221 , 146789 }, + { 16902076 , 147233 }, + { 17140429 , 147677 }, + { 17381297 , 148121 }, + { 17624696 , 148564 }, + { 17870643 , 149007 }, + { 18119154 , 149451 }, + { 18370247 , 149894 }, + { 18623936 , 150336 }, + { 18880241 , 150779 }, + { 19139176 , 151222 }, + { 19400759 , 151664 }, + { 19665007 , 152107 }, + { 19931936 , 152549 }, + { 20201564 , 152991 }, + { 20473907 , 153433 }, + { 20748982 , 153875 }, + { 21026807 , 154316 }, + { 21307399 , 154758 }, + { 21590773 , 155199 }, + { 21876949 , 155641 }, + { 22165941 , 156082 }, + { 22457769 , 156523 }, + { 22752449 , 156964 }, + { 23049999 , 157405 }, + { 23350435 , 157846 }, + { 23653774 , 158287 }, + { 23960036 , 158727 }, + { 24269236 , 159168 }, + { 24581392 , 159608 }, + { 24896521 , 160049 }, + { 25214642 , 160489 }, + { 25535772 , 160929 }, + { 25859927 , 161370 }, + { 26187127 , 161810 }, + { 26517388 , 162250 }, + { 26850728 , 162690 }, + { 27187165 , 163130 }, + { 27526716 , 163569 }, + { 27869400 , 164009 }, + { 28215234 , 164449 }, + { 28564236 , 164889 }, + { 28916423 , 165328 }, + { 29271815 , 165768 }, + { 29630428 , 166208 }, + { 29992281 , 166647 }, + { 30357392 , 167087 }, + { 30725779 , 167526 }, + { 31097459 , 167965 }, + { 31472452 , 168405 }, + { 31850774 , 168844 }, + { 32232445 , 169283 }, + { 32617482 , 169723 }, + { 33005904 , 170162 }, + { 33397730 , 170601 }, + { 33792976 , 171041 }, + { 34191663 , 171480 }, + { 34593807 , 171919 }, + { 34999428 , 172358 }, + { 35408544 , 172797 }, + { 35821174 , 173237 }, + { 36237335 , 173676 }, + { 36657047 , 174115 }, + { 37080329 , 174554 }, + { 37507197 , 174993 }, + { 37937673 , 175433 }, + { 38371773 , 175872 }, + { 38809517 , 176311 }, + { 39250924 , 176750 }, + { 39696012 , 177190 }, + { 40144800 , 177629 }, + { 40597308 , 178068 }, + { 41053553 , 178507 }, + { 41513554 , 178947 }, + { 41977332 , 179386 }, + { 42444904 , 179825 }, + { 42916290 , 180265 }, + { 43391509 , 180704 }, + { 43870579 , 181144 }, + { 44353520 , 181583 }, + { 44840352 , 182023 }, + { 45331092 , 182462 }, + { 45825761 , 182902 }, + { 46324378 , 183342 }, + { 46826961 , 183781 }, + { 47333531 , 184221 }, + { 47844106 , 184661 }, + { 48358706 , 185101 }, + { 48877350 , 185541 }, + { 49400058 , 185981 }, + { 49926849 , 186421 }, + { 50457743 , 186861 }, + { 50992759 , 187301 }, + { 51531916 , 187741 }, + { 52075235 , 188181 }, + { 52622735 , 188622 }, + { 53174435 , 189062 }, + { 53730355 , 189502 }, + { 54290515 , 189943 }, + { 54854935 , 190383 }, + { 55423634 , 190824 }, + { 55996633 , 191265 }, + { 56573950 , 191706 }, + { 57155606 , 192146 }, + { 57741621 , 192587 }, + { 58332014 , 193028 }, + { 58926806 , 193470 }, + { 59526017 , 193911 }, + { 60129666 , 194352 }, + { 60737774 , 194793 }, + { 61350361 , 195235 }, + { 61967446 , 195677 }, + { 62589050 , 196118 }, + { 63215194 , 196560 }, + { 63845897 , 197002 }, + { 64481179 , 197444 }, + { 65121061 , 197886 }, + { 65765563 , 198328 }, + { 66414705 , 198770 }, + { 67068508 , 199213 }, + { 67726992 , 199655 }, + { 68390177 , 200098 }, + { 69058085 , 200540 }, + { 69730735 , 200983 }, + { 70408147 , 201426 }, + { 71090343 , 201869 }, + { 71777343 , 202312 }, + { 72469168 , 202755 }, + { 73165837 , 203199 }, + { 73867373 , 203642 }, + { 74573795 , 204086 }, + { 75285124 , 204529 }, + { 76001380 , 204973 }, + { 76722586 , 205417 }, + { 77448761 , 205861 }, + { 78179926 , 206306 }, + { 78916102 , 206750 }, + { 79657310 , 207194 }, + { 80403571 , 207639 }, + { 81154906 , 208084 }, + { 81911335 , 208529 }, + { 82672880 , 208974 }, + { 83439562 , 209419 }, + { 84211402 , 209864 }, + { 84988421 , 210309 }, + { 85770640 , 210755 }, + { 86558080 , 211201 }, + { 87350762 , 211647 }, + { 88148708 , 212093 }, + { 88951938 , 212539 }, + { 89760475 , 212985 }, + { 90574339 , 213432 }, + { 91393551 , 213878 }, + { 92218133 , 214325 }, + { 93048107 , 214772 }, + { 93883493 , 215219 }, + { 94724314 , 215666 }, + { 95570590 , 216114 }, + { 96422343 , 216561 }, + { 97279594 , 217009 }, + { 98142366 , 217457 }, + { 99010679 , 217905 }, + { 99884556 , 218353 }, + { 100764018 , 218801 }, + { 101649086 , 219250 }, + { 102539782 , 219698 }, + { 103436128 , 220147 }, + { 104338146 , 220596 }, + { 105245857 , 221046 }, + { 106159284 , 221495 }, + { 107078448 , 221945 }, + { 108003370 , 222394 }, + { 108934074 , 222844 }, + { 109870580 , 223294 }, + { 110812910 , 223745 }, + { 111761087 , 224195 }, + { 112715133 , 224646 }, + { 113675069 , 225097 }, + { 114640918 , 225548 }, + { 115612702 , 225999 }, + { 116590442 , 226450 }, + { 117574162 , 226902 }, + { 118563882 , 227353 }, + { 119559626 , 227805 }, + { 120561415 , 228258 }, + { 121569272 , 228710 }, + { 122583219 , 229162 }, + { 123603278 , 229615 }, + { 124629471 , 230068 }, + { 125661822 , 230521 }, + { 126700352 , 230974 }, + { 127745083 , 231428 }, + { 128796039 , 231882 }, + { 129853241 , 232336 }, + { 130916713 , 232790 }, + { 131986475 , 233244 }, + { 133062553 , 233699 }, + { 134144966 , 234153 }, + { 135233739 , 234608 }, + { 136328894 , 235064 }, + { 137430453 , 235519 }, + { 138538440 , 235975 }, + { 139652876 , 236430 }, + { 140773786 , 236886 }, + { 141901190 , 237343 }, + { 143035113 , 237799 }, + { 144175576 , 238256 }, + { 145322604 , 238713 }, + { 146476218 , 239170 }, + { 147636442 , 239627 }, + { 148803298 , 240085 }, + { 149976809 , 240542 }, + { 151156999 , 241000 }, + { 152343890 , 241459 }, + { 153537506 , 241917 }, + { 154737869 , 242376 }, + { 155945002 , 242835 }, + { 157158929 , 243294 }, + { 158379673 , 243753 }, + { 159607257 , 244213 }, + { 160841704 , 244673 }, + { 162083037 , 245133 }, + { 163331279 , 245593 }, + { 164586455 , 246054 }, + { 165848586 , 246514 }, + { 167117696 , 246975 }, + { 168393810 , 247437 }, + { 169676949 , 247898 }, + { 170967138 , 248360 }, + { 172264399 , 248822 }, + { 173568757 , 249284 }, + { 174880235 , 249747 }, + { 176198856 , 250209 }, + { 177524643 , 250672 }, + { 178857621 , 251136 }, + { 180197813 , 251599 }, + { 181545242 , 252063 }, + { 182899933 , 252527 }, + { 184261908 , 252991 }, + { 185631191 , 253456 }, + { 187007807 , 253920 }, + { 188391778 , 254385 }, + { 189783129 , 254851 }, + { 191181884 , 255316 }, + { 192588065 , 255782 }, + { 194001698 , 256248 }, + { 195422805 , 256714 }, + { 196851411 , 257181 }, + { 198287540 , 257648 }, + { 199731215 , 258115 }, + { 201182461 , 258582 }, + { 202641302 , 259050 }, + { 204107760 , 259518 }, + { 205581862 , 259986 }, + { 207063630 , 260454 }, + { 208553088 , 260923 }, + { 210050262 , 261392 }, + { 211555174 , 261861 }, + { 213067849 , 262331 }, + { 214588312 , 262800 }, + { 216116586 , 263270 }, + { 217652696 , 263741 }, + { 219196666 , 264211 }, + { 220748520 , 264682 }, + { 222308282 , 265153 }, + { 223875978 , 265625 }, + { 225451630 , 266097 }, + { 227035265 , 266569 }, + { 228626905 , 267041 }, + { 230226576 , 267514 }, + { 231834302 , 267986 }, + { 233450107 , 268460 }, + { 235074016 , 268933 }, + { 236706054 , 269407 }, + { 238346244 , 269881 }, + { 239994613 , 270355 }, + { 241651183 , 270830 }, + { 243315981 , 271305 } +}; + +/* Calculate the send rate as per section 3.1 of RFC3448 + +Returns send rate in bytes per second + +Integer maths and lookups are used as not allowed floating point in kernel + +The function for Xcalc as per section 3.1 of RFC3448 is: + +X = s + ------------------------------------------------------------- + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) + +where +X is the trasmit rate in bytes/second +s is the packet size in bytes +R is the round trip time in seconds +p is the loss event rate, between 0 and 1.0, of the number of loss events + as a fraction of the number of packets transmitted +t_RTO is the TCP retransmission timeout value in seconds +b is the number of packets acknowledged by a single TCP acknowledgement + +we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: + +X = s + ----------------------------------------------------------------------- + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) + + +which we can break down into: + +X = s + -------- + R * f(p) + +where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) + +Function parameters: +s - bytes +R - RTT in usecs +p - loss rate (decimal fraction multiplied by 1,000,000) + +Returns Xcalc in bytes per second + +DON'T alter this code unless you run test cases against it as the code +has been manipulated to stop underflow/overlow. + +*/ +static u32 ccid3_calc_x(u16 s, u32 R, u32 p) +{ + int index; + u32 f; + u64 tmp1, tmp2; + + if (p < CALCX_SPLIT) + index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; + else + index = (p / (1000000 / CALCX_ARRSIZE)) - 1; + + if (index < 0) + /* p should be 0 unless there is a bug in my code */ + index = 0; + + if (R == 0) + R = 1; /* RTT can't be zero or else divide by zero */ + + BUG_ON(index >= CALCX_ARRSIZE); + + if (p >= CALCX_SPLIT) + f = calcx_lookup[index][0]; + else + f = calcx_lookup[index][1]; + + tmp1 = ((u64)s * 100000000); + tmp2 = ((u64)R * (u64)f); + do_div(tmp2,10000); + do_div(tmp1,tmp2); + /* don't alter above math unless you test due to overflow on 32 bit */ + + return (u32)tmp1; +} + +/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ +static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +{ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) + return; + /* if no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) */ + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10); + /* reason for above maths with 10 in there is to avoid 32 bit + * overflow for jumbo packets */ + +} + +/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ +static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) +{ + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + +} + +/* + * Update X by + * If (p > 0) + * x_calc = calcX(s, R, p); + * X = max(min(X_calc, 2 * X_recv), s / t_mbi); + * Else + * If (now - tld >= R) + * X = max(min(2 * X, 2 * X_recv), s / R); + * tld = now; + */ +static void ccid3_hc_tx_update_x(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ + hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), + hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); + } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { + u32 rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) { + rtt = 10; + } /* avoid divide by zero below */ + + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), + (hctx->ccid3hctx_s * 100000) / (rtt / 10)); + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + do_gettimeofday(&hctx->ccid3hctx_t_ld); + } + + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 1; + } +} + +static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + unsigned long next_tmout = 0; + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + u32 rtt; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + /* XXX: set some sensible MIB */ + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); + goto out; + } + + ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_TERM: + goto out; + case TFRC_SSTATE_NO_FBACK: + /* Halve send rate */ + hctx->ccid3hctx_x /= 2; + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; + + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), + hctx->ccid3hctx_x); + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); + /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ + /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 + * should adjust tx_t_ipi and double that to achieve it really */ + break; + case TFRC_SSTATE_FBACK: + /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ + rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) + rtt = 10; + /* stop divide by zero below */ + if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= + 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + /* Halve sending rate */ + + /* If (X_calc > 2 * X_recv) + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); + * Else + * X_recv = X_calc / 4; + */ + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); + + /* check also if p is zero -> x_calc is infinity? */ + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || + hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) + hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, + hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); + else + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + } + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 10; + } + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + goto out; + } + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + hctx->ccid3hctx_idle = 1; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, + int len, long *delay) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *new_packet = NULL; + struct timeval now; + int rc = -ENOTCONN; + +// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); + /* + * check if pure ACK or Terminating */ + /* XXX: We only call this function for DATA and DATAACK, on, these packets can have + * zero length, but why the comment about "pure ACK"? + */ + if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + goto out; + + /* See if last packet allocated was not sent */ + if (!list_empty(&hctx->ccid3hctx_hist)) + new_packet = list_entry(hctx->ccid3hctx_hist.next, + struct ccid3_tx_hist_entry, ccid3htx_node); + + if (new_packet == NULL || new_packet->ccid3htx_sent) { + new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + + rc = -ENOBUFS; + if (new_packet == NULL) { + ccid3_pr_debug("%s, sk=%p, not enough mem to add " + "to history, send refused\n", dccp_role(sk), sk); + goto out; + } + + list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + } + + do_gettimeofday(&now); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, + dp->dccps_gss); + + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + hctx->ccid3hctx_last_win_count = 0; + hctx->ccid3hctx_t_last_win_count = now; + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + + /* Set nominal send time for initial packet */ + hctx->ccid3hctx_t_nom = now; + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + rc = 0; + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n",*delay); + *delay /= -1000; + /* divide by -1000 is to convert to ms and get sign right */ + rc = *delay > 0 ? -EAGAIN : 0; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + rc = -EINVAL; + break; + } + + /* Can we send? if so add options and add to packet history */ + if (rc == 0) + new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +out: + return rc; +} + +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *packet = NULL; + struct timeval now; + +// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); + BUG_ON(hctx == NULL); + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", + dccp_role(sk), sk); + return; + } + + do_gettimeofday(&now); + + /* check if we have sent a data packet */ + if (len > 0) { + unsigned long quarter_rtt; + + if (list_empty(&hctx->ccid3hctx_hist)) { + printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); + return; + } + packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); + if (packet->ccid3htx_sent) { + printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); + return; + } + packet->ccid3htx_tstamp = now; + packet->ccid3htx_seqno = dp->dccps_gss; + // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); + + /* + * Check if win_count have changed */ + /* COMPLIANCE_BEGIN + * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt + */ + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); + if (quarter_rtt > 0) { + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + + min_t(unsigned long, quarter_rtt, 5)) % 16; + ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", + dccp_role(sk), sk, + packet->ccid3htx_win_count, + hctx->ccid3hctx_last_win_count); + } + /* COMPLIANCE_END */ +#if 0 + ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", + dccp_role(sk), sk, + packet->ccid3htx_seqno, + packet->ccid3htx_win_count); +#endif + hctx->ccid3hctx_idle = 0; + packet->ccid3htx_sent = 1; + } else + ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", + dccp_role(sk), sk, dp->dccps_gss); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* if first wasn't pure ack */ + if (len != 0) + printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", + __FUNCTION__, dccp_role(sk)); + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + if (len > 0) { + hctx->ccid3hctx_t_nom = now; + ccid3_calc_new_t_ipi(hctx); + ccid3_calc_new_delta(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + struct ccid3_tx_hist_entry *entry, *next, *packet; + unsigned long next_tmout; + u16 t_elapsed; + u32 pinv; + u32 x_recv; + u32 r_sample; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hctx == NULL) + return; + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); + return; + } + + /* we are only interested in ACKs */ + if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || + DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) + return; + + opt_recv = &hctx->ccid3hctx_options_received; + + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + x_recv = opt_recv->ccid3or_receive_rate; + pinv = opt_recv->ccid3or_loss_event_rate; + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* FIXME: what to do here? */ + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + /* Calculate new round trip sample by + * R_sample = (now - t_recvdata) - t_delay */ + /* get t_recvdata from history */ + packet = NULL; + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) + if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { + packet = entry; + break; + } + + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", + dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + /* Update RTT */ + r_sample = now_delta(packet->ccid3htx_tstamp); + /* FIXME: */ + // r_sample -= usecs_to_jiffies(t_elapsed * 10); + + /* Update RTT estimate by + * If (No feedback recv) + * R = R_sample; + * Else + * R = q * R + (1 - q) * R_sample; + * + * q is a constant, RFC 3448 recomments 0.9 + */ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); + hctx->ccid3hctx_rtt = r_sample; + } else + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + + /* + * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent + * implemention of the new window count. + */ + if (hctx->ccid3hctx_rtt < 4) + hctx->ccid3hctx_rtt = 4; + + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", + dccp_role(sk), sk, + hctx->ccid3hctx_rtt, + r_sample); + + /* Update timeout interval */ + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); + + /* Update receive rate */ + hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ + + /* Update loss event rate */ + if (pinv == ~0 || pinv == 0) + hctx->ccid3hctx_p = 0; + else { + hctx->ccid3hctx_p = 1000000 / pinv; + + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { + hctx->ccid3hctx_p = TFRC_SMALLEST_P; + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); + } + } + + /* unschedule no feedback timer */ + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + + /* Update next send time */ + if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { + (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; + (hctx->ccid3hctx_t_nom).tv_sec--; + } + /* FIXME - if no feedback then t_ipi can go > 1 second */ + (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; + ccid3_calc_new_t_ipi(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + + /* remove all packets older than the one acked from history */ +#if 0 + FIXME! + list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } +#endif + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + /* to prevent divide by zero below */ + + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max(inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + /* maths with 100000 and 10 is to prevent overflow with 32 bit */ + + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", + dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); + + /* set idle flag */ + hctx->ccid3hctx_idle = 1; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +} + +static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, + unsigned char len, u16 idx, unsigned char *value) +{ + int rc = 0; + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + + if (hctx == NULL) + return 0; + + opt_recv = &hctx->ccid3hctx_options_received; + + if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { + opt_recv->ccid3or_seqno = dp->dccps_gsr; + opt_recv->ccid3or_loss_event_rate = ~0; + opt_recv->ccid3or_loss_intervals_idx = 0; + opt_recv->ccid3or_loss_intervals_len = 0; + opt_recv->ccid3or_receive_rate = 0; + } + + switch (option) { + case TFRC_OPT_LOSS_EVENT_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_event_rate); + } + break; + case TFRC_OPT_LOSS_INTERVALS: + opt_recv->ccid3or_loss_intervals_idx = idx; + opt_recv->ccid3or_loss_intervals_len = len; + ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_intervals_idx, + opt_recv->ccid3or_loss_intervals_len); + break; + case TFRC_OPT_RECEIVE_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_receive_rate); + } + break; + } + + return rc; +} + +static int ccid3_hc_tx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (hctx == NULL) + return -ENOMEM; + + memset(hctx, 0, sizeof(*hctx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + else + hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; + + hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ + hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ + inet_csk(sk)->icsk_rto = USEC_IN_SEC; + hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; + INIT_LIST_HEAD(&hctx->ccid3hctx_hist); + init_timer(&hctx->ccid3hctx_no_feedback_timer); + + return 0; +} + +static void ccid3_hc_tx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *entry, *next; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + BUG_ON(hctx == NULL); + + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Empty packet history */ + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } + + kfree(dp->dccps_hc_tx_ccid_private); + dp->dccps_hc_tx_ccid_private = NULL; +} + +/* + * RX Half Connection methods + */ + +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) +{ + static char *ccid3_rx_state_names[] = { + [TFRC_RSTATE_NO_DATA] = "NO_DATA", + [TFRC_RSTATE_DATA] = "DATA", + [TFRC_RSTATE_TERM] = "TERM", + }; + + return ccid3_rx_state_names[state]; +} +#endif + +static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); + WARN_ON(state == oldstate); + hcrx->ccid3hcrx_state = state; +} + +static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next; + u8 num_later = 0; + + if (list_empty(&hcrx->ccid3hcrx_hist)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + u64 seqno = packet->ccid3hrx_seqno; + struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, + struct ccid3_rx_hist_entry, + ccid3hrx_node); + if (after48(seqno, iter->ccid3hrx_seqno)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later = 1; + + list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (after48(seqno, iter->ccid3hrx_seqno)) { + list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + goto trim_history; + } + + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later++; + + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { + ccid3_rx_hist_entry_delete(packet); + ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", + dccp_role(sk), sk, seqno); + return 1; + } + } + + if (num_later < TFRC_RECV_NUM_LATE_LOSS) + list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + /* FIXME: else what? should we destroy the packet like above? */ + } + } + +trim_history: + /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; + + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } else { + int step = 0; + u8 win_count = 0; /* Not needed, but lets shut up gcc */ + int tmp; + /* + * We have no loss interval history so we need at least one + * rtt:s of data packets to approximate rtt. + */ + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + switch (step) { + case 0: + step = 1; + /* OK, find next data packet */ + num_later = 1; + break; + case 1: + step = 2; + /* OK, find next data packet */ + num_later = 1; + win_count = entry->ccid3hrx_win_count; + break; + case 2: + tmp = win_count - entry->ccid3hrx_win_count; + if (tmp < 0) + tmp += TFRC_WIN_COUNT_LIMIT; + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { + /* we have found a packet older than one rtt + * remove the rest */ + step = 3; + } else /* OK, find next data packet */ + num_later = 1; + break; + case 3: + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + break; + } + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } + + return 0; +} + +static void ccid3_hc_rx_send_feedback(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *packet; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + hcrx->ccid3hcrx_x_recv = 0; + break; + case TFRC_RSTATE_DATA: { + u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + + if (delta == 0) + delta = 1; /* to prevent divide by zero */ + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + packet = NULL; + list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + if (packet == NULL) { + printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", + __FUNCTION__, dccp_role(sk), sk); + dump_stack(); + return; + } + + do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); + hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_bytes_recv = 0; + + /* Convert to multiples of 10us */ + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + if (hcrx->ccid3hcrx_p == 0) + hcrx->ccid3hcrx_pinv = ~0; + else + hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dccp_send_ack(sk); +} + +static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) + dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { + const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); + const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); + + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); + } + + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; +} + +/* Weights used to calculate loss event rate */ +/* + * These are integers as per section 8 of RFC3448. We can then divide by 4 * + * when we use it. + */ +const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; + +/* + * args: fvalue - function value to match + * returns: p closest to that value + * + * both fvalue and p are multiplied by 1,000,000 to use ints + */ +u32 calcx_reverse_lookup(u32 fvalue) { + int ctr = 0; + int small; + + if (fvalue < calcx_lookup[0][1]) + return 0; + if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) + small = 1; + else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) + return 1000000; + else + small = 0; + while (fvalue > calcx_lookup[ctr][small]) + ctr++; + if (small) + return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); + else + return (1000000 * ctr / CALCX_ARRSIZE) ; +} + +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ + +static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + u32 rtt, delta, x_recv, fval, p, tmp2; + struct timeval tstamp, tmp_tv; + int interval = 0; + int win_count = 0; + int step = 0; + u64 tmp1; + + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->ccid3hrx_tstamp; + win_count = entry->ccid3hrx_win_count; + step = 1; + break; + case 1: + interval = win_count - entry->ccid3hrx_win_count; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (step == 0) { + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", + __FUNCTION__, dccp_role(sk), sk); + return ~0; + } + + if (interval == 0) { + ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", + dccp_role(sk), sk); + interval = 1; + } +found: + timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; + ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", + dccp_role(sk), sk, rtt); + if (rtt == 0) + rtt = 1; + + delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + if (delta == 0) + delta = 1; + + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + + tmp1 = (u64)x_recv * (u64)rtt; + do_div(tmp1,10000000); + tmp2 = (u32)tmp1; + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; + /* do not alter order above or you will get overflow on 32 bit */ + p = calcx_reverse_lookup(fval); + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ + dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry; + + if (seq_loss != DCCP_MAX_SEQNO + 1) { + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", + dccp_role(sk), sk, seq_loss, win_loss); + + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + struct ccid3_loss_interval_hist_entry *li_tail = NULL; + int i; + + ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); + for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { + li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); + if (li_entry == NULL) { + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", + dccp_role(sk), sk); + return; + } + if (li_tail == NULL) + li_tail = li_entry; + list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); + } + + li_entry->ccid3lih_seqno = seq_loss; + li_entry->ccid3lih_win_count = win_loss; + + li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); + } + } + /* FIXME: find end of interval */ +} + +static void ccid3_hc_rx_detect_loss(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; + struct ccid3_rx_hist_entry *a_loss = NULL; + struct ccid3_rx_hist_entry *b_loss = NULL; + u64 seq_loss = DCCP_MAX_SEQNO + 1; + u8 win_loss = 0; + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + + list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + b_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + + if (b_loss == NULL) + goto out_update_li; + + a_next = b_next; + num_later = 1; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + a_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } +#endif + + if (a_loss == NULL) { + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + /* no loss event have occured yet */ + ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " + "packet by comparing to initial seqno\n", + dccp_role(sk), sk); + goto out_update_li; + } else { + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", + __FUNCTION__, dccp_role(sk), sk); + return; + } + } + + /* Locate a lost data packet */ + entry = packet = b_loss; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + + if (delta != 0) { + if (packet->ccid3hrx_type == DCCP_PKT_DATA || + packet->ccid3hrx_type == DCCP_PKT_DATAACK) + --delta; + /* + * FIXME: check this, probably this % usage is because + * in earlier drafts the ndp count was just 8 bits + * long, but now it cam be up to 24 bits long. + */ +#if 0 + if (delta % DCCP_NDP_LIMIT != + (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) +#endif + if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { + seq_loss = entry->ccid3hrx_seqno; + dccp_inc_seqno(&seq_loss); + } + } + packet = entry; + if (packet == a_loss) + break; + } +#endif + + if (seq_loss != DCCP_MAX_SEQNO + 1) + win_loss = a_loss->ccid3hrx_win_count; + +out_update_li: + ccid3_hc_rx_update_li(sk, seq_loss, win_loss); +} + +static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry, *li_next; + int i = 0; + u32 i_tot; + u32 i_tot0 = 0; + u32 i_tot1 = 0; + u32 w_tot = 0; + + list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { + if (i < TFRC_RECV_IVAL_F_LENGTH) { + i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; + w_tot += ccid3_hc_rx_w[i]; + } + + if (i != 0) + i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; + + if (++i > TFRC_RECV_IVAL_F_LENGTH) + break; + } + + if (i != TFRC_RECV_IVAL_F_LENGTH) { + pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", + __FUNCTION__, dccp_role(sk), sk); + return 0; + } + + i_tot = max(i_tot0, i_tot1); + + /* FIXME: Why do we do this? -Ian McDonald */ + if (i_tot * 4 < w_tot) + i_tot = w_tot * 4; + + return i_tot * 4 / w_tot; +} + +static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *packet; + struct timeval now; + u8 win_count; + u32 p_prev; + int ins; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hcrx == NULL) + return; + + BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || + hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_ACK: + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + return; + case DCCP_PKT_DATAACK: + if (dp->dccps_options_received.dccpor_timestamp_echo == 0) + break; + p_prev = hcrx->ccid3hcrx_rtt; + do_gettimeofday(&now); + /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - + usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); + FIXME - I think above code is broken - have to look at options more, will also need + to fix pr_debug below */ + if (p_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, + dp->dccps_options_received.dccpor_timestamp_echo, + dp->dccps_options_received.dccpor_elapsed_time); + break; + case DCCP_PKT_DATA: + break; + default: + ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", + dccp_role(sk), sk, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", + dccp_role(sk), sk); + return; + } + + win_count = packet->ccid3hrx_win_count; + + ins = ccid3_hc_rx_add_hist(sk, packet); + + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) + return; + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_hc_rx_send_feedback(sk); + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); + return; + case TFRC_RSTATE_DATA: + hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; + if (ins == 0) { + do_gettimeofday(&now); + if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { + hcrx->ccid3hcrx_tstamp_last_ack = now; + ccid3_hc_rx_send_feedback(sk); + } + return; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + /* Dealing with packet loss */ + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + + ccid3_hc_rx_detect_loss(sk); + p_prev = hcrx->ccid3hcrx_p; + + /* Calculate loss event rate */ + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) + /* Scaling up by 1000000 as fixed decimal */ + hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); + + if (hcrx->ccid3hcrx_p > p_prev) { + ccid3_hc_rx_send_feedback(sk); + return; + } +} + +static int ccid3_hc_rx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (hcrx == NULL) + return -ENOMEM; + + memset(hcrx, 0, sizeof(*hcrx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + else + hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; + + hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; + INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); + INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); + + return 0; +} + +static void ccid3_hc_rx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + if (hcrx == NULL) + return; + + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); + + /* Empty packet history */ + ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + + /* Empty loss interval history */ + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + + kfree(dp->dccps_hc_rx_ccid_private); + dp->dccps_hc_rx_ccid_private = NULL; +} + +static struct ccid ccid3 = { + .ccid_id = 3, + .ccid_name = "ccid3", + .ccid_owner = THIS_MODULE, + .ccid_init = ccid3_init, + .ccid_exit = ccid3_exit, + .ccid_hc_tx_init = ccid3_hc_tx_init, + .ccid_hc_tx_exit = ccid3_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, + .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, + .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, + .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, + .ccid_hc_rx_init = ccid3_hc_rx_init, + .ccid_hc_rx_exit = ccid3_hc_rx_exit, + .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, + .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, +}; + +module_param(ccid3_debug, int, 0444); +MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); + +static __init int ccid3_module_init(void) +{ + int rc = -ENOMEM; + + ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", + sizeof(struct ccid3_tx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_tx_hist_slab == NULL) + goto out; + + ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", + sizeof(struct ccid3_rx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_rx_hist_slab == NULL) + goto out_free_tx_history; + + ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", + sizeof(struct ccid3_loss_interval_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_loss_interval_hist_slab == NULL) + goto out_free_rx_history; + + rc = ccid_register(&ccid3); + if (rc != 0) + goto out_free_loss_interval_history; + +out: + return rc; +out_free_loss_interval_history: + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; +out_free_rx_history: + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; +out_free_tx_history: + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + goto out; +} +module_init(ccid3_module_init); + +static __exit void ccid3_module_exit(void) +{ + ccid_unregister(&ccid3); + + if (ccid3_tx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + } + if (ccid3_rx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; + } + if (ccid3_loss_interval_hist_slab != NULL) { + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; + } +} +module_exit(ccid3_module_exit); + +MODULE_AUTHOR("Ian McDonald & Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("net-dccp-ccid-3"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..5d6b623e64da --- /dev/null +++ b/net/dccp/ccids/ccid3.h @@ -0,0 +1,137 @@ +/* + * net/dccp/ccids/ccid3.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _DCCP_CCID3_H_ +#define _DCCP_CCID3_H_ + +#include +#include +#include + +struct ccid3_tx_hist_entry { + struct list_head ccid3htx_node; + u64 ccid3htx_seqno:48, + ccid3htx_win_count:8, + ccid3htx_sent:1; + struct timeval ccid3htx_tstamp; +}; + +struct ccid3_options_received { + u64 ccid3or_seqno:48, + ccid3or_loss_intervals_idx:16; + u16 ccid3or_loss_intervals_len; + u32 ccid3or_loss_event_rate; + u32 ccid3or_receive_rate; +}; + +/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block + * + * @ccid3hctx_state - Sender state + * @ccid3hctx_x - Current sending rate + * @ccid3hctx_x_recv - Receive rate + * @ccid3hctx_x_calc - Calculated send (?) rate + * @ccid3hctx_s - Packet size + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_idle - FIXME + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_t_ipi - Interpacket (send) interval + * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_hist - Packet history + */ +struct ccid3_hc_tx_sock { + u32 ccid3hctx_x; + u32 ccid3hctx_x_recv; + u32 ccid3hctx_x_calc; + u16 ccid3hctx_s; + u32 ccid3hctx_rtt; + u32 ccid3hctx_p; + u8 ccid3hctx_state; + u8 ccid3hctx_last_win_count; + u8 ccid3hctx_idle; + struct timeval ccid3hctx_t_last_win_count; + struct timer_list ccid3hctx_no_feedback_timer; + struct timeval ccid3hctx_t_ld; + struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_ipi; + u32 ccid3hctx_delta; + struct list_head ccid3hctx_hist; + struct ccid3_options_received ccid3hctx_options_received; +}; + +struct ccid3_loss_interval_hist_entry { + struct list_head ccid3lih_node; + u64 ccid3lih_seqno:48, + ccid3lih_win_count:4; + u32 ccid3lih_interval; +}; + +struct ccid3_rx_hist_entry { + struct list_head ccid3hrx_node; + u64 ccid3hrx_seqno:48, + ccid3hrx_win_count:4, + ccid3hrx_type:4; + u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval ccid3hrx_tstamp; +}; + +struct ccid3_hc_rx_sock { + u64 ccid3hcrx_seqno_last_counter:48, + ccid3hcrx_state:8, + ccid3hcrx_last_counter:4; + unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_p; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; + struct list_head ccid3hcrx_hist; + struct list_head ccid3hcrx_loss_interval_hist; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; + u32 ccid3hcrx_x_recv; +}; + +#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) + +#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) + +#endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..fb83454102c1 --- /dev/null +++ b/net/dccp/dccp.h @@ -0,0 +1,422 @@ +#ifndef _DCCP_H +#define _DCCP_H +/* + * net/dccp/dccp.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define DCCP_DEBUG + +#ifdef DCCP_DEBUG +extern int dccp_debug; + +#define dccp_pr_debug(format, a...) \ + do { if (dccp_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ + } while (0) +#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) +#else +#define dccp_pr_debug(format, a...) +#define dccp_pr_debug_cat(format, a...) +#endif + +extern struct inet_hashinfo dccp_hashinfo; + +extern atomic_t dccp_orphan_count; +extern int dccp_tw_count; +extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); + +extern void dccp_time_wait(struct sock *sk, int state, int timeo); + +/* FIXME: Right size this */ +#define DCCP_MAX_OPT_LEN 128 + +#define DCCP_MAX_PACKET_HDR 32 + +#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) + +#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT + * state, about 60 seconds */ + +/* draft-ietf-dccp-spec-11.txt initial RTO value */ +#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) + +/* Maximal interval between probes for local resources. */ +#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) + +#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ + +extern struct proto dccp_v4_prot; + +/* is seq1 < seq2 ? */ +static inline const int before48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; +} + +/* is seq1 > seq2 ? */ +static inline const int after48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; +} + +/* is seq2 <= seq1 <= seq3 ? */ +static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) +{ + return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); +} + +static inline u64 max48(const u64 seq1, const u64 seq2) +{ + return after48(seq1, seq2) ? seq1 : seq2; +} + +enum { + DCCP_MIB_NUM = 0, + DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ + DCCP_MIB_ESTABRESETS, /* EstabResets */ + DCCP_MIB_CURRESTAB, /* CurrEstab */ + DCCP_MIB_OUTSEGS, /* OutSegs */ + DCCP_MIB_OUTRSTS, + DCCP_MIB_ABORTONTIMEOUT, + DCCP_MIB_TIMEOUTS, + DCCP_MIB_ABORTFAILED, + DCCP_MIB_PASSIVEOPENS, + DCCP_MIB_ATTEMPTFAILS, + DCCP_MIB_OUTDATAGRAMS, + DCCP_MIB_INERRS, + DCCP_MIB_OPTMANDATORYERROR, + DCCP_MIB_INVALIDOPT, + __DCCP_MIB_MAX +}; + +#define DCCP_MIB_MAX __DCCP_MIB_MAX +struct dccp_mib { + unsigned long mibs[DCCP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + +DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) +#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) + +extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); + +extern int dccp_send_response(struct sock *sk); +extern void dccp_send_ack(struct sock *sk); +extern void dccp_send_delayed_ack(struct sock *sk); +extern void dccp_send_sync(struct sock *sk, u64 seq); + +extern void dccp_init_xmit_timers(struct sock *sk); +static inline void dccp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} + +extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); + +extern const char *dccp_packet_name(const int type); +extern const char *dccp_state_name(const int state); + +static inline void dccp_set_state(struct sock *sk, const int state) +{ + const int oldstate = sk->sk_state; + + dccp_pr_debug("%s(%p) %-10.10s -> %s\n", + dccp_role(sk), sk, + dccp_state_name(oldstate), dccp_state_name(state)); + WARN_ON(state == oldstate); + + switch (state) { + case DCCP_OPEN: + if (oldstate != DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_CURRESTAB); + break; + + case DCCP_CLOSED: + if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); + + sk->sk_prot->unhash(sk); + if (inet_csk(sk)->icsk_bind_hash != NULL && + !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + inet_put_port(&dccp_hashinfo, sk); + /* fall through */ + default: + if (oldstate == DCCP_OPEN) + DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); + } + + /* Change state AFTER socket is unhashed to avoid closed + * socket sitting in hash tables. + */ + sk->sk_state = state; +} + +static inline void dccp_done(struct sock *sk) +{ + dccp_set_state(sk, DCCP_CLOSED); + dccp_clear_xmit_timers(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + else + inet_csk_destroy_sock(sk); +} + +static inline void dccp_openreq_init(struct request_sock *req, + struct dccp_sock *dp, + struct sk_buff *skb) +{ + /* + * FIXME: fill in the other req fields from the DCCP options + * received + */ + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->acked = 0; + req->rcv_wnd = 0; +} + +extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, + struct sk_buff *skb); +extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); + +extern struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb); + +extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); + +extern void dccp_v4_err(struct sk_buff *skb, u32); + +extern int dccp_v4_rcv(struct sk_buff *skb); + +extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, + struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); + +extern int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len); +extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len); + +extern void dccp_close(struct sock *sk, long timeout); +extern struct sk_buff *dccp_make_response(struct sock *sk, + struct dst_entry *dst, + struct request_sock *req); + +extern int dccp_connect(struct sock *sk); +extern int dccp_disconnect(struct sock *sk, int flags); +extern int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen); +extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); +extern int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen); +extern void dccp_shutdown(struct sock *sk, int how); + +extern int dccp_v4_checksum(struct sk_buff *skb); + +extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); +extern void dccp_send_close(struct sock *sk); + +struct dccp_skb_cb { + __u8 dccpd_type; + __u8 dccpd_reset_code; + __u8 dccpd_service; + __u8 dccpd_ccval; + __u64 dccpd_seq; + __u64 dccpd_ack_seq; + int dccpd_opt_len; +}; + +#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) + +static inline int dccp_non_data_packet(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_ACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ || + type == DCCP_PKT_RESET || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK; +} + +static inline int dccp_packet_without_ack(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; +} + +#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) +#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) + +static inline void dccp_set_seqno(u64 *seqno, u64 value) +{ + if (value > DCCP_MAX_SEQNO) + value -= DCCP_MAX_SEQNO + 1; + *seqno = value; +} + +static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) +{ + return ((seqno2 << 16) - (seqno1 << 16)) >> 16; +} + +static inline void dccp_inc_seqno(u64 *seqno) +{ + if (++*seqno > DCCP_MAX_SEQNO) + *seqno = 0; +} + +static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) +{ + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); + +#if defined(__LITTLE_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)); +#else +#error "Adjust your defines" +#endif + dhx->dccph_seq_low = htonl(gss & 0xffffffff); +} + +static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)); +#else +#error "Adjust your defines" +#endif + dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); +} + +static inline void dccp_update_gsr(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gsr; + + dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); + dp->dccps_gsr = seq; + dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); + dccp_set_seqno(&dp->dccps_swh, + dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); +} + +static inline void dccp_update_gss(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gss; + + dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); + dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); + dp->dccps_awh = dp->dccps_gss = seq; +} + +extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); +extern void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time); +extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + unsigned char option, + const void *value, unsigned char len); + +extern struct socket *dccp_ctl_socket; + +#define DCCP_ACKPKTS_STATE_RECEIVED 0 +#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackpkts - acknowledgeable packets + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpap_buf_head - circular buffer head + * @dccpap_buf_tail - circular buffer tail + * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) + * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) + * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpap_buf_len - circular buffer length + * @dccpap_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackpkts { + unsigned int dccpap_buf_head; + unsigned int dccpap_buf_tail; + u64 dccpap_buf_ackno; + u64 dccpap_ack_seqno; + u64 dccpap_ack_ackno; + unsigned int dccpap_ack_ptr; + unsigned int dccpap_buf_vector_len; + unsigned int dccpap_ack_vector_len; + unsigned int dccpap_buf_len; + unsigned long dccpap_time; + u8 dccpap_buf_nonce; + u8 dccpap_ack_nonce; + u8 dccpap_buf[0]; +}; + +extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); +extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno); + +#ifdef DCCP_DEBUG +extern void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, int len); +extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); +#else +static inline void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, + int len) { } +static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } +#endif + +#endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..622e976a51fe --- /dev/null +++ b/net/dccp/input.c @@ -0,0 +1,510 @@ +/* + * net/dccp/input.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_fin(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(sk, SOCK_DONE); + __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); +} + +static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) +{ + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_CLOSED); + break; + } +} + +static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) +{ + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return; + } + + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_set_state(sk, DCCP_CLOSING); + dccp_send_close(sk); + break; + } +} + +static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_options.dccpo_send_ack_vector) + dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); +} + +static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + struct dccp_sock *dp = dccp_sk(sk); + u64 lswl = dp->dccps_swl; + u64 lawl = dp->dccps_awl; + + /* + * Step 5: Prepare sequence numbers for Sync + * If P.type == Sync or P.type == SyncAck, + * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, + * / * P is valid, so update sequence number variables + * accordingly. After this update, P will pass the tests + * in Step 6. A SyncAck is generated if necessary in + * Step 15 * / + * Update S.GSR, S.SWL, S.SWH + * Otherwise, + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_SYNC || + dh->dccph_type == DCCP_PKT_SYNCACK) { + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && + !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + else + return -1; + /* + * Step 6: Check sequence numbers + * Let LSWL = S.SWL and LAWL = S.AWL + * If P.type == CloseReq or P.type == Close or P.type == Reset, + * LSWL := S.GSR + 1, LAWL := S.GAR + * If LSWL <= P.seqno <= S.SWH + * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), + * Update S.GSR, S.SWL, S.SWH + * If P.type != Sync, + * Update S.GAR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || + dh->dccph_type == DCCP_PKT_CLOSE || + dh->dccph_type == DCCP_PKT_RESET) { + lswl = dp->dccps_gsr; + dccp_inc_seqno(&lswl); + lawl = dp->dccps_gar; + } + + if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (dh->dccph_type != DCCP_PKT_SYNC && + DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + } else { + dccp_pr_debug("Step 6 failed, sending SYNC...\n"); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return -1; + } + + return 0; +} + +int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dccp_check_seqno(sk, skb)) + goto discard; + + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) { + LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + goto discard; + } + + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (!inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); + } + } + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + switch (dccp_hdr(skb)->dccph_type) { + case DCCP_PKT_DATAACK: + case DCCP_PKT_DATA: + /* + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option + * if it is. + */ + __skb_pull(skb, dh->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); + return 0; + case DCCP_PKT_ACK: + goto discard; + case DCCP_PKT_RESET: + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + case DCCP_PKT_CLOSEREQ: + dccp_rcv_closereq(sk, skb); + goto discard; + case DCCP_PKT_CLOSE: + dccp_rcv_close(sk, skb); + return 0; + case DCCP_PKT_REQUEST: + /* Step 7 + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state >= OPEN and P.type == Request + * and P.seqno >= S.OSR) + * or (S.state >= OPEN and P.type == Response + * and P.seqno >= S.OSR) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dp->dccps_role != DCCP_ROLE_LISTEN) + goto send_sync; + goto check_seq; + case DCCP_PKT_RESPONSE: + if (dp->dccps_role != DCCP_ROLE_CLIENT) + goto send_sync; +check_seq: + if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { +send_sync: + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + } + break; + } + + DCCP_INC_STATS_BH(DCCP_MIB_INERRS); +discard: + __kfree_skb(skb); + return 0; +} + +static int dccp_rcv_request_sent_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + /* + * Step 4: Prepare sequence numbers in REQUEST + * If S.state == REQUEST, + * If (P.type == Response or P.type == Reset) + * and S.AWL <= P.ackno <= S.AWH, + * / * Set sequence number variables corresponding to the + * other endpoint, so P will pass the tests in Step 6 * / + * Set S.GSR, S.ISR, S.SWL, S.SWH + * / * Response processing continues in Step 10; Reset + * processing continues in Step 9 * / + */ + if (dh->dccph_type == DCCP_PKT_RESPONSE) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct dccp_sock *dp = dccp_sk(sk); + + /* Stop the REQUEST timer */ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + BUG_TRAP(sk->sk_send_head != NULL); + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { + dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", + dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + goto out_invalid_packet; + } + + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || + ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + /* FIXME: send appropriate RESET code */ + goto out_invalid_packet; + } + + dccp_sync_mss(sk, dp->dccps_pmtu_cookie); + + /* + * Step 10: Process REQUEST state (second part) + * If S.state == REQUEST, + * / * If we get here, P is a valid Response from the server (see + * Step 4), and we should move to PARTOPEN state. PARTOPEN + * means send an Ack, don't send Data packets, retransmit + * Acks periodically, and always include any Init Cookie from + * the Response * / + * S.state := PARTOPEN + * Set PARTOPEN timer + * Continue with S.state == PARTOPEN + * / * Step 12 will send the Ack completing the three-way + * handshake * / + */ + dccp_set_state(sk, DCCP_PARTOPEN); + + /* Make sure socket is routed, for correct metrics. */ + inet_sk_rebuild_header(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + } + + if (sk->sk_write_pending || icsk->icsk_ack.pingpong || + icsk->icsk_accept_queue.rskq_defer_accept) { + /* Save one ACK. Data will be ready after + * several ticks, if write_pending is set. + * + * It may be deleted, but with this feature tcpdumps + * look so _wonderfully_ clever, that I was not able + * to stand against the temptation 8) --ANK + */ + /* + * OK, in DCCP we can as well do a similar trick, its + * even in the draft, but there is no need for us to + * schedule an ack here, as dccp_sendmsg does this for + * us, also stated in the draft. -acme + */ + __kfree_skb(skb); + return 0; + } + dccp_send_ack(sk); + return -1; + } + +out_invalid_packet: + return 1; /* dccp_v4_do_rcv will send a reset, but... + FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ +} + +static int dccp_rcv_respond_partopen_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + int queued = 0; + + switch (dh->dccph_type) { + case DCCP_PKT_RESET: + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + break; + case DCCP_PKT_DATAACK: + case DCCP_PKT_ACK: + /* + * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, + * but only if we haven't used the DELACK timer for something else, + * like sending a delayed ack for a TIMESTAMP echo, etc, for now + * were not clearing it, sending an extra ACK when there is nothing + * else to do in DELACK is not a big deal after all. + */ + + /* Stop the PARTOPEN timer */ + if (sk->sk_state == DCCP_PARTOPEN) + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + + dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_set_state(sk, DCCP_OPEN); + + if (dh->dccph_type == DCCP_PKT_DATAACK) { + dccp_rcv_established(sk, skb, dh, len); + queued = 1; /* packet was queued (by dccp_rcv_established) */ + } + break; + } + + return queued; +} + +int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + const int old_state = sk->sk_state; + int queued = 0; + + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { + if (dccp_check_seqno(sk, skb)) + goto discard; + + /* + * Step 8: Process options and mark acknowledgeable + */ + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) + goto discard; + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && + !inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + } + } + } + + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_RESET) { + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && + (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + goto discard; + } + + switch (sk->sk_state) { + case DCCP_CLOSED: + return 1; + + case DCCP_LISTEN: + if (dh->dccph_type == DCCP_PKT_ACK || + dh->dccph_type == DCCP_PKT_DATAACK) + return 1; + + if (dh->dccph_type == DCCP_PKT_RESET) + goto discard; + + if (dh->dccph_type == DCCP_PKT_REQUEST) { + if (dccp_v4_conn_request(sk, skb) < 0) + return 1; + + /* FIXME: do congestion control initialization */ + goto discard; + } + goto discard; + + case DCCP_REQUESTING: + /* FIXME: do congestion control initialization */ + + queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); + if (queued >= 0) + return queued; + + __kfree_skb(skb); + return 0; + + case DCCP_RESPOND: + case DCCP_PARTOPEN: + queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); + break; + } + + if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { + switch (old_state) { + case DCCP_PARTOPEN: + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + break; + } + } + + if (!queued) { +discard: + __kfree_skb(skb); + } + return 0; +} diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..083bacaecb3b --- /dev/null +++ b/net/dccp/ipv4.c @@ -0,0 +1,1289 @@ +/* + * net/dccp/ipv4.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, + .port_rover = 1024 - 1, +}; + +static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) +{ + return inet_csk_get_port(&dccp_hashinfo, sk, snum); +} + +static void dccp_v4_hash(struct sock *sk) +{ + inet_hash(&dccp_hashinfo, sk); +} + +static void dccp_v4_unhash(struct sock *sk) +{ + inet_unhash(&dccp_hashinfo, sk); +} + +/* called with local bh disabled */ +static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_sock *inet = inet_sk(sk); + const u32 daddr = inet->rcv_saddr; + const u32 saddr = inet->daddr; + const int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); + struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + const struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ + inet->num = lport; + inet->sport = htons(lport); + sk->sk_hashent = hash; + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + dccp_tw_deschedule(tw); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +/* + * Bind a port for a connect operation and hash it. + */ +static int dccp_v4_hash_connect(struct sock *sk) +{ + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + int rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + + /* TODO. Actually it is not so bad idea to remove + * dccp_hashinfo.portalloc_lock before next submission to Linus. + * As soon as we touch this place at all it is time to think. + * + * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, + * hence it is mostly useless. + * Code will work nicely if we just delete it, but + * I am afraid in contented case it will work not better or + * even worse: another cpu just will hit the same bucket + * and spin there. + * So some cpu salt could remove both contention and + * memory pingpong. Any ideas how to do this in a nice way? + */ + spin_lock(&dccp_hashinfo.portalloc_lock); + rover = dccp_hashinfo.port_rover; + + do { + rover++; + if ((rover < low) || (rover > high)) + rover = low; + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == rover) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__dccp_v4_check_established(sk, + rover, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); + if (tb == NULL) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } while (--remaining > 0); + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + /* All locks still held and bhs disabled */ + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + inet_bind_hash(sk, tb, rover); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(rover); + __inet_hash(&dccp_hashinfo, sk, 0); + } + spin_unlock(&head->lock); + + if (tw != NULL) { + dccp_tw_deschedule(tw); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet_hash(&dccp_hashinfo, sk, 0); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __dccp_v4_check_established(sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct rtable *rt; + u32 daddr, nexthop; + int tmp; + int err; + + dp->dccps_role = DCCP_ROLE_CLIENT; + + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + nexthop = daddr = usin->sin_addr.s_addr; + if (inet->opt != NULL && inet->opt->srr) { + if (daddr == 0) + return -EINVAL; + nexthop = inet->opt->faddr; + } + + tmp = ip_route_connect(&rt, nexthop, inet->saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + inet->sport, usin->sin_port, sk); + if (tmp < 0) + return tmp; + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + ip_rt_put(rt); + return -ENETUNREACH; + } + + if (inet->opt == NULL || !inet->opt->srr) + daddr = rt->rt_dst; + + if (inet->saddr == 0) + inet->saddr = rt->rt_src; + inet->rcv_saddr = inet->saddr; + + inet->dport = usin->sin_port; + inet->daddr = daddr; + + dp->dccps_ext_header_len = 0; + if (inet->opt != NULL) + dp->dccps_ext_header_len = inet->opt->optlen; + /* + * Socket identity is still unknown (sport may be zero). + * However we set state to DCCP_REQUESTING and not releasing socket + * lock select source port, enter ourselves into the hash tables and + * complete initialization after this. + */ + dccp_set_state(sk, DCCP_REQUESTING); + err = dccp_v4_hash_connect(sk); + if (err != 0) + goto failure; + + err = ip_route_newports(&rt, inet->sport, inet->dport, sk); + if (err != 0) + goto failure; + + /* OK, now commit destination to socket. */ + sk_setup_caps(sk, &rt->u.dst); + + dp->dccps_gar = + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, + inet->daddr, + inet->sport, + usin->sin_port); + dccp_update_gss(sk, dp->dccps_iss); + + inet->id = dp->dccps_iss ^ jiffies; + + err = dccp_connect(sk); + rt = NULL; + if (err != 0) + goto failure; +out: + return err; +failure: + /* This unhashes the socket and releases the local port, if necessary. */ + dccp_set_state(sk, DCCP_CLOSED); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->dport = 0; + goto out; +} + +/* + * This routine does path mtu discovery as defined in RFC1191. + */ +static inline void dccp_do_pmtu_discovery(struct sock *sk, + const struct iphdr *iph, + u32 mtu) +{ + struct dst_entry *dst; + const struct inet_sock *inet = inet_sk(sk); + const struct dccp_sock *dp = dccp_sk(sk); + + /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs + * send out by Linux are always < 576bytes so they should go through + * unfragmented). + */ + if (sk->sk_state == DCCP_LISTEN) + return; + + /* We don't check in the destentry if pmtu discovery is forbidden + * on this route. We just assume that no packet_to_big packets + * are send back when pmtu discovery is not active. + * There is a small race when the user changes this flag in the + * route, but I think that's acceptable. + */ + if ((dst = __sk_dst_check(sk, 0)) == NULL) + return; + + dst->ops->update_pmtu(dst, mtu); + + /* Something is about to be wrong... Remember soft error + * for the case, if this connection will not able to recover. + */ + if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) + sk->sk_err_soft = EMSGSIZE; + + mtu = dst_mtu(dst); + + if (inet->pmtudisc != IP_PMTUDISC_DONT && + dp->dccps_pmtu_cookie > mtu) { + dccp_sync_mss(sk, mtu); + + /* + * From: draft-ietf-dccp-spec-11.txt + * + * DCCP-Sync packets are the best choice for upward probing, + * since DCCP-Sync probes do not risk application data loss. + */ + dccp_send_sync(sk, dp->dccps_gsr); + } /* else let the usual retransmit timer handle it */ +} + +static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_ack_bits); + struct sk_buff *skb; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + return; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + skb->dst = dst_clone(rxskb->dst); + + skb->h.raw = skb_push(skb, dccp_hdr_ack_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_ack_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_ACK; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_ack_len / 4; + dh->dccph_x = 1; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +} + +static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +{ + dccp_v4_ctl_send_ack(skb); +} + +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + if (err == NET_XMIT_CN) + err = 0; + } + +out: + dst_release(dst); + return err; +} + +/* + * This routine is called by the ICMP module when it gets some sort of error + * condition. If err < 0 then the socket should be closed and the error + * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. + * After adjustment header points to the first 8 bytes of the tcp header. We + * need to find the appropriate port. + * + * The locking strategy used here is very "optimistic". When someone else + * accesses the socket the ICMP is just dropped and for some paths there is no + * check at all. A more general error queue to queue errors for later handling + * is probably better. + */ +void dccp_v4_err(struct sk_buff *skb, u32 info) +{ + const struct iphdr *iph = (struct iphdr *)skb->data; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); + struct dccp_sock *dp; + struct inet_sock *inet; + const int type = skb->h.icmph->type; + const int code = skb->h.icmph->code; + struct sock *sk; + __u64 seq; + int err; + + if (skb->len < (iph->ihl << 2) + 8) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, + iph->saddr, dh->dccph_sport, inet_iif(skb)); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + if (sk->sk_state == DCCP_TIME_WAIT) { + inet_twsk_put((struct inet_timewait_sock *)sk); + return; + } + + bh_lock_sock(sk); + /* If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + if (sock_owned_by_user(sk)) + NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + + if (sk->sk_state == DCCP_CLOSED) + goto out; + + dp = dccp_sk(sk); + seq = dccp_hdr_seq(skb); + if (sk->sk_state != DCCP_LISTEN && + !between48(seq, dp->dccps_swl, dp->dccps_swh)) { + NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + + switch (type) { + case ICMP_SOURCE_QUENCH: + /* Just silently ignore these. */ + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + break; + case ICMP_DEST_UNREACH: + if (code > NR_ICMP_UNREACH) + goto out; + + if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + if (!sock_owned_by_user(sk)) + dccp_do_pmtu_discovery(sk, iph, info); + goto out; + } + + err = icmp_err_convert[code].errno; + break; + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + goto out; + } + + switch (sk->sk_state) { + struct request_sock *req , **prev; + case DCCP_LISTEN: + if (sock_owned_by_user(sk)) + goto out; + req = inet_csk_search_req(sk, &prev, dh->dccph_dport, + iph->daddr, iph->saddr); + if (!req) + goto out; + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + BUG_TRAP(!req->sk); + + if (seq != dccp_rsk(req)->dreq_iss) { + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; + + case DCCP_REQUESTING: + case DCCP_RESPOND: + if (!sock_owned_by_user(sk)) { + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + sk->sk_err = err; + + sk->sk_error_report(sk); + + dccp_done(sk); + } else + sk->sk_err_soft = err; + goto out; + } + + /* If we've already connected we will keep trying + * until we time out, or the user gives up. + * + * rfc1122 4.2.3.9 allows to consider as hard errors + * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, + * but it is obsoleted by pmtu discovery). + * + * Note, that in modern internet, where routing is unreliable + * and in each dark corner broken firewalls sit, sending random + * errors ordered by their masters even this two messages finally lose + * their original sense (even Linux sends invalid PORT_UNREACHs) + * + * Now we are in compliance with RFCs. + * --ANK (980905) + */ + + inet = inet_sk(sk); + if (!sock_owned_by_user(sk) && inet->recverr) { + sk->sk_err = err; + sk->sk_error_report(sk); + } else /* Only an error on timeout */ + sk->sk_err_soft = err; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); + +int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) +{ + struct sk_buff *skb; + /* + * FIXME: what if rebuild_header fails? + * Should we be doing a rebuild_header here? + */ + int err = inet_sk_rebuild_header(sk); + + if (err != 0) + return err; + + skb = dccp_make_reset(sk, sk->sk_dst_cache, code); + if (skb != NULL) { + const struct dccp_sock *dp = dccp_sk(sk); + const struct inet_sock *inet = inet_sk(sk); + + err = ip_build_and_send_pkt(skb, sk, + inet->saddr, inet->daddr, NULL); + if (err == NET_XMIT_CN) + err = 0; + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + } + + return err; +} + +static inline u64 dccp_v4_init_sequence(const struct sock *sk, + const struct sk_buff *skb) +{ + return secure_dccp_sequence_number(skb->nh.iph->daddr, + skb->nh.iph->saddr, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport); +} + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct dccp_sock dp; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __u32 saddr = skb->nh.iph->saddr; + const __u32 daddr = skb->nh.iph->daddr; + struct dst_entry *dst = NULL; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(sk->sk_prot->rsk_prot); + if (req == NULL) + goto drop; + + /* FIXME: process options */ + + dccp_openreq_init(req, &dp, skb); + + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + /* FIXME: Merge Aristeu's option parsing code when ready */ + req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + + if (dccp_v4_send_response(sk, req, dst)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + /* + * FIXME: should be reqsk_free after implementing req->rsk_ops + */ + __reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + return -1; +} + +/* + * The three way handshake has completed - we got a valid ACK or DATAACK - + * now create the new socket. + * + * This is the equivalent of TCP's tcp_v4_syn_recv_sock + */ +struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) +{ + struct inet_request_sock *ireq; + struct inet_sock *newinet; + struct dccp_sock *newdp; + struct sock *newsk; + + if (sk_acceptq_is_full(sk)) + goto exit_overflow; + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto exit; + + newsk = dccp_create_openreq_child(sk, req, skb); + if (newsk == NULL) + goto exit; + + sk_setup_caps(newsk, dst); + + newdp = dccp_sk(newsk); + newinet = inet_sk(newsk); + ireq = inet_rsk(req); + newinet->daddr = ireq->rmt_addr; + newinet->rcv_saddr = ireq->loc_addr; + newinet->saddr = ireq->loc_addr; + newinet->opt = ireq->opt; + ireq->opt = NULL; + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = skb->nh.iph->ttl; + newinet->id = jiffies; + + dccp_sync_mss(newsk, dst_mtu(dst)); + + __inet_hash(&dccp_hashinfo, newsk, 0); + __inet_inherit_port(&dccp_hashinfo, sk, newsk); + + return newsk; + +exit_overflow: + NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); +exit: + NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + dst_release(dst); + return NULL; +} + +static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + const struct iphdr *iph = skb->nh.iph; + struct sock *nsk; + struct request_sock **prev; + /* Find possible connection requests. */ + struct request_sock *req = inet_csk_search_req(sk, &prev, + dh->dccph_sport, + iph->saddr, iph->daddr); + if (req != NULL) + return dccp_check_req(sk, skb, req, prev); + + nsk = __inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + if (nsk != NULL) { + if (nsk->sk_state != DCCP_TIME_WAIT) { + bh_lock_sock(nsk); + return nsk; + } + inet_twsk_put((struct inet_timewait_sock *)nsk); + return NULL; + } + + return sk; +} + +int dccp_v4_checksum(struct sk_buff *skb) +{ + struct dccp_hdr* dh = dccp_hdr(skb); + int checksum_len; + u32 tmp; + + if (dh->dccph_cscov == 0) + checksum_len = skb->len; + else { + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + } + + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); + return csum_fold(tmp); +} + +static int dccp_v4_verify_checksum(struct sk_buff *skb) +{ + struct dccp_hdr *th = dccp_hdr(skb); + const u16 remote_checksum = th->dccph_checksum; + u16 local_checksum; + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = 0; /* zero it for computation */ + + local_checksum = dccp_v4_checksum(skb); + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = remote_checksum; /* put it back */ + + return remote_checksum == local_checksum ? 0 : -1; +} + +static struct dst_entry* dccp_v4_route_skb(struct sock *sk, + struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + .nl_u = { .ip4_u = + { .daddr = skb->nh.iph->saddr, + .saddr = skb->nh.iph->daddr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = dccp_hdr(skb)->dccph_dport, + .dport = dccp_hdr(skb)->dccph_sport } } }; + + if (ip_route_output_flow(&rt, &fl, sk, 0)) { + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + + return &rt->u.dst; +} + +void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb; + struct dst_entry *dst; + + /* Never send a reset in response to a reset. */ + if (rxdh->dccph_type == DCCP_PKT_RESET) + return; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); + if (dst == NULL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + goto out; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->dst = dst_clone(dst); + + skb->h.raw = skb_push(skb, dccp_hdr_reset_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_reset_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; + dh->dccph_x = 1; + dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +out: + dst_release(dst); +} + +int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_hdr *dh = dccp_hdr(skb); + + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ + if (dccp_rcv_established(sk, skb, dh, skb->len)) + goto reset; + return 0; + } + + /* + * Step 3: Process LISTEN state + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie option, + * * Must scan the packet's options to check for an Init + * Cookie. Only the Init Cookie is processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies * + * * Generate a new socket and switch to that socket * + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Continue with S.state == RESPOND + * * A Response packet will be generated in Step 11 * + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in dccp_rcv_state_process + */ + if (sk->sk_state == DCCP_LISTEN) { + struct sock *nsk = dccp_v4_hnd_req(sk, skb); + + if (nsk == NULL) + goto discard; + + if (nsk != sk) { + if (dccp_child_process(sk, nsk, skb)) + goto reset; + return 0; + } + } + + if (dccp_rcv_state_process(sk, skb, dh, skb->len)) + goto reset; + return 0; + +reset: + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); +discard: + kfree_skb(skb); + return 0; +} + +static inline int dccp_invalid_packet(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + + if (skb->pkt_type != PACKET_HOST) + return 1; + + if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { + dccp_pr_debug("pskb_may_pull failed\n"); + return 1; + } + + dh = dccp_hdr(skb); + + /* If the packet type is not understood, drop packet and return */ + if (dh->dccph_type >= DCCP_PKT_INVALID) { + dccp_pr_debug("invalid packet type\n"); + return 1; + } + + /* + * If P.Data Offset is too small for packet type, or too large for + * packet, drop packet and return + */ + if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); + return 1; + } + + if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { + dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); + return 1; + } + + dh = dccp_hdr(skb); + + /* + * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet + * has short sequence numbers), drop packet and return + */ + if (dh->dccph_x == 0 && + dh->dccph_type != DCCP_PKT_DATA && + dh->dccph_type != DCCP_PKT_ACK && + dh->dccph_type != DCCP_PKT_DATAACK) { + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); + return 1; + } + + /* If the header checksum is incorrect, drop packet and return */ + if (dccp_v4_verify_checksum(skb) < 0) { + dccp_pr_debug("header checksum is incorrect\n"); + return 1; + } + + return 0; +} + +/* this is called when real data arrives */ +int dccp_v4_rcv(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + struct sock *sk; + int rc; + + /* Step 1: Check header basics: */ + + if (dccp_invalid_packet(skb)) + goto discard_it; + + dh = dccp_hdr(skb); +#if 0 + /* + * Use something like this to simulate some DATA/DATAACK loss to test + * dccp_ackpkts_add, you'll get something like this on a session that + * sends 10 DATA/DATAACK packets: + * + * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * + * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state + * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet + * + * So... + * + * 281473596467422 was received + * 281473596467421 was not received + * 281473596467420 was received + * 281473596467419 was not received + * 281473596467418 was received + * 281473596467417 was not received + * 281473596467416 was received + * 281473596467415 was not received + * 281473596467414 was received + * 281473596467413 was received (this one was the 3way handshake RESPONSE) + * + */ + if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { + static int discard = 0; + + if (discard) { + discard = 0; + goto discard_it; + } + discard = 1; + } +#endif + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; + + dccp_pr_debug("%8.8s " + "src=%u.%u.%u.%u@%-5d " + "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_packet_name(dh->dccph_type), + NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), + NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + DCCP_SKB_CB(skb)->dccpd_seq); + + if (dccp_packet_without_ack(skb)) { + DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; + dccp_pr_debug_cat("\n"); + } else { + DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); + dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); + } + + /* Step 2: + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup(&dccp_hashinfo, + skb->nh.iph->saddr, dh->dccph_sport, + skb->nh.iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + + /* + * Step 2: + * If no socket ... + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (sk == NULL) { + dccp_pr_debug("failed to look up flow ID in table and " + "get corresponding socket\n"); + goto no_dccp_socket; + } + + /* + * Step 2: + * ... or S.state == TIMEWAIT, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + + if (sk->sk_state == DCCP_TIME_WAIT) { + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); + goto discard_and_relse; + } + + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { + dccp_pr_debug("xfrm4_policy_check failed\n"); + goto discard_and_relse; + } + + if (sk_filter(sk, skb, 0)) { + dccp_pr_debug("sk_filter failed\n"); + goto discard_and_relse; + } + + skb->dev = NULL; + + bh_lock_sock(sk); + rc = 0; + if (!sock_owned_by_user(sk)) + rc = dccp_v4_do_rcv(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + sock_put(sk); + return rc; + +no_dccp_socket: + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto discard_it; + /* + * Step 2: + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (dh->dccph_type != DCCP_PKT_RESET) { + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); + } + +discard_it: + /* Discard frame. */ + kfree_skb(skb); + return 0; + +discard_and_relse: + sock_put(sk); + goto discard_it; +} + +static int dccp_v4_init_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + static int dccp_ctl_socket_init = 1; + + dccp_options_init(&dp->dccps_options); + + if (dp->dccps_options.dccpo_send_ack_vector) { + dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); + + if (dp->dccps_hc_rx_ackpkts == NULL) + return -ENOMEM; + } + + /* + * FIXME: We're hardcoding the CCID, and doing this at this point makes + * the listening (master) sock get CCID control blocks, which is not + * necessary, but for now, to not mess with the test userspace apps, + * lets leave it here, later the real solution is to do this in a + * setsockopt(CCIDs-I-want/accept). -acme + */ + if (likely(!dccp_ctl_socket_init)) { + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + if (dp->dccps_hc_rx_ccid == NULL || + dp->dccps_hc_tx_ccid == NULL) { + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + return -ENOMEM; + } + } else + dccp_ctl_socket_init = 0; + + dccp_init_xmit_timers(sk); + sk->sk_state = DCCP_CLOSED; + dp->dccps_mss_cache = 536; + dp->dccps_role = DCCP_ROLE_UNDEFINED; + + return 0; +} + +int dccp_v4_destroy_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + /* + * DCCP doesn't use sk_qrite_queue, just sk_send_head + * for retransmissions + */ + if (sk->sk_send_head != NULL) { + kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + /* Clean up a referenced DCCP bind bucket. */ + if (inet_csk(sk)->icsk_bind_hash != NULL) + inet_put_port(&dccp_hashinfo, sk); + + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + + return 0; +} + +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_v4_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +struct proto dccp_v4_prot = { + .name = "DCCP", + .owner = THIS_MODULE, + .close = dccp_close, + .connect = dccp_v4_connect, + .disconnect = dccp_disconnect, + .ioctl = dccp_ioctl, + .init = dccp_v4_init_sock, + .setsockopt = dccp_setsockopt, + .getsockopt = dccp_getsockopt, + .sendmsg = dccp_sendmsg, + .recvmsg = dccp_recvmsg, + .backlog_rcv = dccp_v4_do_rcv, + .hash = dccp_v4_hash, + .unhash = dccp_v4_unhash, + .accept = inet_csk_accept, + .get_port = dccp_v4_get_port, + .shutdown = dccp_shutdown, + .destroy = dccp_v4_destroy_sock, + .orphan_count = &dccp_orphan_count, + .max_header = MAX_DCCP_HEADER, + .obj_size = sizeof(struct dccp_sock), + .rsk_prot = &dccp_request_sock_ops, + .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ +}; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..810f0c293b85 --- /dev/null +++ b/net/dccp/minisocks.c @@ -0,0 +1,199 @@ +/* + * net/dccp/minisocks.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +void dccp_time_wait(struct sock *sk, int state, int timeo) +{ + /* FIXME: Implement */ + dccp_pr_debug("Want to help? Start here\n"); + dccp_set_state(sk, state); +} + +/* This is for handling early-kills of TIME_WAIT sockets. */ +void dccp_tw_deschedule(struct inet_timewait_sock *tw) +{ + dccp_pr_debug("Want to help? Start here\n"); + __inet_twsk_kill(tw, &dccp_hashinfo); +} + +struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb) +{ + /* + * Step 3: Process LISTEN state + * + * // Generate a new socket and switch to that socket + * Set S := new socket for this port pair + */ + struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + + if (newsk != NULL) { + const struct dccp_request_sock *dreq = dccp_rsk(req); + struct inet_connection_sock *newicsk = inet_csk(sk); + struct dccp_sock *newdp = dccp_sk(newsk); + + newdp->dccps_hc_rx_ackpkts = NULL; + newdp->dccps_role = DCCP_ROLE_SERVER; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; + + if (newdp->dccps_options.dccpo_send_ack_vector) { + newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); + /* + * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone + * copied the master sock and left the CCID pointers for this child, + * that is why we do the __ccid_get calls. + */ + if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + goto out_free; + } + + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { + dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); + ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); +out_free: + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + return NULL; + } + + __ccid_get(newdp->dccps_hc_rx_ccid); + __ccid_get(newdp->dccps_hc_tx_ccid); + + /* + * Step 3: Process LISTEN state + * + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + */ + + /* See dccp_v4_conn_request */ + newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; + + newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + + newdp->dccps_iss = dreq->dreq_iss; + dccp_update_gss(newsk, dreq->dreq_iss); + + dccp_init_xmit_timers(newsk); + + DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + } + return newsk; +} + +/* + * Process an incoming packet for RESPOND sockets represented + * as an request_sock. + */ +struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev) +{ + struct sock *child = NULL; + + /* Check for retransmitted REQUEST */ + if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { + struct dccp_request_sock *dreq = dccp_rsk(req); + + dccp_pr_debug("Retransmitted REQUEST\n"); + /* Send another RESPONSE packet */ + dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); + dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + } + /* Network Duplicate, discard packet */ + return NULL; + } + + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && + dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) + goto drop; + + /* Invalid ACK */ + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); + goto drop; + } + + child = dccp_v4_request_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; + + /* FIXME: deal with options */ + + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_add(sk, req, child); +out: + return child; +listen_overflow: + dccp_pr_debug("listen_overflow!\n"); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; +drop: + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) + req->rsk_ops->send_reset(skb); + + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; +} + +/* + * Queue segment on the new socket if the new socket is active, + * otherwise we just shortcircuit this and continue with + * the new socket. + */ +int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb) +{ + int ret = 0; + const int state = child->sk_state; + + if (!sock_owned_by_user(child)) { + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); + + /* Wakeup parent, send SIGIO */ + if (state == DCCP_RESPOND && child->sk_state != state) + parent->sk_data_ready(parent, 0); + } else { + /* Alas, it is possible again, because we do lookup + * in main socket hash table and lock on listening + * socket does not protect us more. + */ + sk_add_backlog(child, skb); + } + + bh_unlock_sock(child); + sock_put(child); + return ret; +} diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..e1867767946c --- /dev/null +++ b/net/dccp/options.c @@ -0,0 +1,763 @@ +/* + * net/dccp/options.c + * + * An implementation of the DCCP protocol + * Aristeu Sergio Rozanski Filho + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, + const u64 ackno, + const unsigned char len, + const unsigned char *vector); + +/* stores the default values for new connection. may be changed with sysctl */ +static const struct dccp_options dccpo_default_values = { + .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, + .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, + .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, +}; + +void dccp_options_init(struct dccp_options *dccpo) +{ + memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); +} + +static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +{ + u32 value = 0; + + if (len > 3) + value += *bf++ << 24; + if (len > 2) + value += *bf++ << 16; + if (len > 1) + value += *bf++ << 8; + if (len > 0) + value += *bf; + + return value; +} + +int dccp_parse_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : + "server rx opt: "; +#endif + const struct dccp_hdr *dh = dccp_hdr(skb); + const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); + unsigned char *opt_ptr = options; + const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); + struct dccp_options_received *opt_recv = &dp->dccps_options_received; + unsigned char opt, len; + unsigned char *value; + + memset(opt_recv, 0, sizeof(*opt_recv)); + + while (opt_ptr != opt_end) { + opt = *opt_ptr++; + len = 0; + value = NULL; + + /* Check if this isn't a single byte option */ + if (opt > DCCPO_MAX_RESERVED) { + if (opt_ptr == opt_end) + goto out_invalid_option; + + len = *opt_ptr++; + if (len < 3) + goto out_invalid_option; + /* + * Remove the type and len fields, leaving + * just the value size + */ + len -= 2; + value = opt_ptr; + opt_ptr += len; + + if (opt_ptr > opt_end) + goto out_invalid_option; + } + + switch (opt) { + case DCCPO_PADDING: + break; + case DCCPO_NDP_COUNT: + if (len > 3) + goto out_invalid_option; + + opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); + break; + case DCCPO_ACK_VECTOR_0: + if (len > DCCP_MAX_ACK_VECTOR_LEN) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + + opt_recv->dccpor_ack_vector_len = len; + opt_recv->dccpor_ack_vector_idx = value - options; + + dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", + debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, + value, len); + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + break; + case DCCPO_TIMESTAMP: + if (len != 4) + goto out_invalid_option; + + opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); + + dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; + dp->dccps_timestamp_time = jiffies; + + dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", + debug_prefix, opt_recv->dccpor_timestamp, + DCCP_SKB_CB(skb)->dccpd_ack_seq); + break; + case DCCPO_TIMESTAMP_ECHO: + if (len < 4 || len > 8) + goto out_invalid_option; + + opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", + debug_prefix, opt_recv->dccpor_timestamp_echo, + len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, + tcp_time_stamp - opt_recv->dccpor_timestamp_echo); + + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + case DCCPO_ELAPSED_TIME: + if (len > 4) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + /* + * From draft-ietf-dccp-spec-11.txt: + * + * Option numbers 128 through 191 are for options sent from the HC- + * Sender to the HC-Receiver; option numbers 192 through 255 are for + * options sent from the HC-Receiver to the HC-Sender. + */ + case 128 ... 191: { + const u16 idx = value - options; + + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + case 192 ... 255: { + const u16 idx = value - options; + + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + default: + pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", + sk, opt, len); + break; + } + } + + return 0; + +out_invalid_option: + DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; + pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); + return -1; +} + +static void dccp_encode_value_var(const u32 value, unsigned char *to, + const unsigned int len) +{ + if (len > 3) + *to++ = (value & 0xFF000000) >> 24; + if (len > 2) + *to++ = (value & 0xFF0000) >> 16; + if (len > 1) + *to++ = (value & 0xFF00) >> 8; + if (len > 0) + *to++ = (value & 0xFF); +} + +static inline int dccp_ndp_len(const int ndp) +{ + return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; +} + +void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + const unsigned char option, + const void *value, const unsigned char len) +{ + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; + + to = skb_push(skb, len + 2); + *to++ = option; + *to++ = len + 2; + + memcpy(to, value, len); +} + +EXPORT_SYMBOL_GPL(dccp_insert_option); + +static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + int ndp = dp->dccps_ndp_count; + + if (dccp_non_data_packet(skb)) + ++dp->dccps_ndp_count; + else + dp->dccps_ndp_count = 0; + + if (ndp > 0) { + unsigned char *ptr; + const int ndp_len = dccp_ndp_len(ndp); + const int len = ndp_len + 2; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + ptr = skb_push(skb, len); + *ptr++ = DCCPO_NDP_COUNT; + *ptr++ = len; + dccp_encode_value_var(ndp, ptr, ndp_len); + } +} + +static inline int dccp_elapsed_time_len(const u32 elapsed_time) +{ + return elapsed_time == 0 ? 0 : + elapsed_time <= 0xFF ? 1 : + elapsed_time <= 0xFFFF ? 2 : + elapsed_time <= 0xFFFFFF ? 3 : 4; +} + +void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time) +{ +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 2 + elapsed_time_len; + unsigned char *to; + + /* If elapsed_time == 0... */ + if (elapsed_time_len == 2) + return; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ELAPSED_TIME; + *to++ = len; + + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", + debug_prefix, elapsed_time, + len, DCCP_SKB_CB(skb)->dccpd_seq); +} + +EXPORT_SYMBOL(dccp_insert_option_elapsed_time); + +static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + int len = ap->dccpap_buf_vector_len + 2; + const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + unsigned char *to, *from; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + return; + } + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = ap->dccpap_buf_vector_len; + from = ap->dccpap_buf + ap->dccpap_buf_head; + + /* Check if buf_head wraps */ + if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { + const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = ap->dccpap_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal + * buf_nonce. + * + * This implemention uses just one ack record for now. + */ + ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + ap->dccpap_ack_ptr = ap->dccpap_buf_head; + ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; + ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; + ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", + debug_prefix, ap->dccpap_ack_vector_len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); +} + +static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +{ + const u32 now = htonl(tcp_time_stamp); + dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); +} + +static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + u32 tstamp_echo; + const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 6 + elapsed_time_len; + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_TIMESTAMP_ECHO; + *to++ = len; + + tstamp_echo = htonl(dp->dccps_timestamp_echo); + memcpy(to, &tstamp_echo, 4); + to += 4; + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", + debug_prefix, dp->dccps_timestamp_echo, + len, DCCP_SKB_CB(skb)->dccpd_seq); + + dp->dccps_timestamp_echo = 0; + dp->dccps_timestamp_time = 0; +} + +void dccp_insert_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + DCCP_SKB_CB(skb)->dccpd_opt_len = 0; + + if (dp->dccps_options.dccpo_send_ndp_count) + dccp_insert_option_ndp(sk, skb); + + if (!dccp_packet_without_ack(skb)) { + if (dp->dccps_options.dccpo_send_ack_vector && + dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) + dccp_insert_option_ack_vector(sk, skb); + + dccp_insert_option_timestamp(sk, skb); + if (dp->dccps_timestamp_echo != 0) + dccp_insert_option_timestamp_echo(sk, skb); + } + + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + + /* XXX: insert other options when appropriate */ + + if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { + /* The length of all options has to be a multiple of 4 */ + int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; + + if (padding != 0) { + padding = 4 - padding; + memset(skb_push(skb, padding), 0, padding); + DCCP_SKB_CB(skb)->dccpd_opt_len += padding; + } + } +} + +struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) +{ + struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); + + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap->dccpap_buf, 0xFF, len); +#endif + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time = 0; + ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; + } + + return ap; +} + +void dccp_ackpkts_free(struct dccp_ackpkts *ap) +{ + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); +#endif + kfree(ap); + } +} + +static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; +} + +static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = ap->dccpap_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += ap->dccpap_buf_len; + } + + ap->dccpap_buf_head = new_head; + + if (gap > 0) + memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, + DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); + + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpap_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the HC- + * Sender is sending data at a very high rate, when the HC-Receiver's + * acknowledgements are not reaching the HC-Sender, or when the HC- + * Sender is forgetting to acknowledge those acks (so the HC-Receiver + * is unable to clean up old state). In this case, the HC-Receiver + * should either compress the buffer (by increasing run lengths when + * possible), transfer its state to a larger buffer, or, as a last + * resort, drop all received packets, without processing them + * whatsoever, until its buffer shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (ap->dccpap_buf_vector_len == 0) { + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len = 1; + } else if (after48(ackno, ap->dccpap_buf_ackno)) { + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); + + /* + * Look if the state of this packet is the same as the previous ackno + * and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && + dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) + ap->dccpap_buf[ap->dccpap_buf_head]++; + else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and S <= buf_ackno, + * the HC-Receiver will scan the table for the byte corresponding to S. + * (Indexing structures could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); + unsigned int index = ap->dccpap_buf_head; + + while (1) { + const u8 len = dccp_ackpkts_len(ap, index); + const u8 state = dccp_ackpkts_state(ap, index); + /* + * valid packets not yet in dccpap_buf have a reserved entry, with + * a len equal to 0 + */ + if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", ackno); + ap->dccpap_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == ap->dccpap_buf_len) + index = 0; + } + } + + ap->dccpap_buf_ackno = ackno; + ap->dccpap_time = jiffies; +out: + dccp_pr_debug(""); + dccp_ackpkts_print(ap); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); + return -EILSEQ; +} + +#ifdef DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackpkts_print(const struct dccp_ackpkts *ap) +{ + dccp_ackvector_print(ap->dccpap_buf_ackno, + ap->dccpap_buf + ap->dccpap_buf_head, + ap->dccpap_buf_vector_len); +} +#endif + +static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) +{ + /* + * As we're keeping track of the ack vector size + * (dccpap_buf_vector_len) and the sent ack vector size + * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but + * keep this code here as in the future we'll implement a vector of ack + * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; + if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) + ap->dccpap_buf_tail -= ap->dccpap_buf_len; +#endif + ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; +} + +void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, + u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == ap->dccpap_ack_seqno) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK vector + * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); + if (before48(ackno, ap->dccpap_ack_seqno)) { + // dccp_pr_debug_cat("yes\n"); + return; + } + // dccp_pr_debug_cat("no\n"); + + i = len; + while (i--) { + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); + if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + // dccp_pr_debug_cat("yes\n"); + + if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + } + /* + * If dccpap_ack_seqno was not received, no problem we'll + * send another ACK vector. + */ + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + // dccp_pr_debug_cat("no\n"); + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..22ca2910d4f2 --- /dev/null +++ b/net/dccp/output.c @@ -0,0 +1,406 @@ +/* + * net/dccp/output.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static inline void dccp_event_ack_sent(struct sock *sk) +{ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); +} + +/* + * All SKB's seen here are completely headerless. It is our + * job to build the DCCP header, and pass the packet down to + * IP so it can do the same plus pass the packet off to the + * device. + */ +int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (likely(skb != NULL)) { + const struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + struct dccp_hdr *dh; + /* XXX For now we're using only 48 bits sequence numbers */ + const int dccp_header_size = sizeof(*dh) + + sizeof(struct dccp_hdr_ext) + + dccp_packet_hdr_len(dcb->dccpd_type); + int err, set_ack = 1; + u64 ackno = dp->dccps_gsr; + + /* + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing + * to do here... + */ + dccp_inc_seqno(&dp->dccps_gss); + + dcb->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + switch (dcb->dccpd_type) { + case DCCP_PKT_DATA: + set_ack = 0; + break; + case DCCP_PKT_SYNC: + case DCCP_PKT_SYNCACK: + ackno = dcb->dccpd_seq; + break; + } + + skb->h.raw = skb_push(skb, dccp_header_size); + dh = dccp_hdr(skb); + /* Data packets are not cloned as they are never retransmitted */ + if (skb_cloned(skb)) + skb_set_owner_w(skb, sk); + + /* Build DCCP header and checksum it. */ + memset(dh, 0, dccp_header_size); + dh->dccph_type = dcb->dccpd_type; + dh->dccph_sport = inet->sport; + dh->dccph_dport = inet->dport; + dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; + dh->dccph_ccval = dcb->dccpd_ccval; + /* XXX For now we're using only 48 bits sequence numbers */ + dh->dccph_x = 1; + + dp->dccps_awh = dp->dccps_gss; + dccp_hdr_set_seq(dh, dp->dccps_gss); + if (set_ack) + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); + + switch (dcb->dccpd_type) { + case DCCP_PKT_REQUEST: + dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; + break; + case DCCP_PKT_RESET: + dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; + break; + } + + dh->dccph_checksum = dccp_v4_checksum(skb); + + if (dcb->dccpd_type == DCCP_PKT_ACK || + dcb->dccpd_type == DCCP_PKT_DATAACK) + dccp_event_ack_sent(sk); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + + err = ip_queue_xmit(skb, 0); + if (err <= 0) + return err; + + /* NET_XMIT_CN is special. It does not guarantee, + * that this packet is lost. It tells that device + * is about to start to drop packets or already + * drops some packets of the same priority and + * invokes us to send less aggressively. + */ + return err == NET_XMIT_CN ? 0 : err; + } + return -ENOBUFS; +} + +unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) +{ + struct dccp_sock *dp = dccp_sk(sk); + int mss_now; + + /* + * FIXME: we really should be using the af_specific thing to support IPv6. + * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + */ + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + + /* Now subtract optional transport overhead */ + mss_now -= dp->dccps_ext_header_len; + + /* + * FIXME: this should come from the CCID infrastructure, where, say, + * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets + * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED + * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to + * make it a multiple of 4 + */ + + mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; + + /* And store cached results */ + dp->dccps_pmtu_cookie = pmtu; + dp->dccps_mss_cache = mss_now; + + return mss_now; +} + +int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (inet_sk_rebuild_header(sk) != 0) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + return dccp_transmit_skb(sk, (skb_cloned(skb) ? + pskb_copy(skb, GFP_ATOMIC): + skb_clone(skb, GFP_ATOMIC))); +} + +struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, + struct request_sock *req) +{ + struct dccp_hdr *dh; + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_response); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; + DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESPONSE; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, + const enum dccp_reset_codes code) + +{ + struct dccp_hdr *dh; + struct dccp_sock *dp = dccp_sk(sk); + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + dccp_inc_seqno(&dp->dccps_gss); + + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_sk(sk)->dport; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dp->dccps_gss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); + + dccp_hdr_reset(skb)->dccph_reset_code = code; + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +/* + * Do all connect socket setups that can be done AF independent. + */ +static inline void dccp_connect_init(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + sk->sk_err = 0; + sock_reset_flag(sk, SOCK_DONE); + + dccp_sync_mss(sk, dst_mtu(dst)); + + /* + * FIXME: set dp->{dccps_swh,dccps_swl}, with + * something like dccp_inc_seq + */ + + icsk->icsk_retransmits = 0; +} + +int dccp_connect(struct sock *sk) +{ + struct sk_buff *skb; + struct inet_connection_sock *icsk = inet_csk(sk); + + dccp_connect_init(sk); + + skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); + if (unlikely(skb == NULL)) + return -ENOBUFS; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; + /* FIXME: set service to something meaningful, coming + * from userspace*/ + DCCP_SKB_CB(skb)->dccpd_service = 0; + skb->csum = 0; + skb_set_owner_w(skb, sk); + + BUG_TRAP(sk->sk_send_head == NULL); + sk->sk_send_head = skb; + dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); + + /* Timer for repeating the REQUEST until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + return 0; +} + +void dccp_send_ack(struct sock *sk) +{ + /* If we have been reset, we may not send again. */ + if (sk->sk_state != DCCP_CLOSED) { + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) { + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); + return; + } + + /* Reserve space for headers */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + } +} + +EXPORT_SYMBOL_GPL(dccp_send_ack); + +void dccp_send_delayed_ack(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + /* + * FIXME: tune this timer. elapsed time fixes the skew, so no problem + * with using 2s, and active senders also piggyback the ACK into a + * DATAACK packet, so this is really for quiescent senders. + */ + unsigned long timeout = jiffies + 2 * HZ; + + /* Use new timeout only if there wasn't a older one earlier. */ + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { + /* If delack timer was blocked or is about to expire, + * send ACK now. + * + * FIXME: check the "about to expire" part + */ + if (icsk->icsk_ack.blocked) { + dccp_send_ack(sk); + return; + } + + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; + } + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); +} + +void dccp_send_sync(struct sock *sk, u64 seq) +{ + /* + * We are not putting this on the write queue, so + * dccp_transmit_skb() will set the ownership to this + * sock. + */ + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) + /* FIXME: how to make sure the sync is sent? */ + return; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; + DCCP_SKB_CB(skb)->dccpd_seq = seq; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); +} + +/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be + * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. + */ +void dccp_send_close(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + + /* Socket is locked, keep trying until memory is available. */ + for (;;) { + skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); + if (skb != NULL) + break; + yield(); + } + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); +} diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..70284e6afe05 --- /dev/null +++ b/net/dccp/proto.c @@ -0,0 +1,818 @@ +/* + * net/dccp/proto.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); + +atomic_t dccp_orphan_count = ATOMIC_INIT(0); + +static struct net_protocol dccp_protocol = { + .handler = dccp_v4_rcv, + .err_handler = dccp_v4_err, +}; + +const char *dccp_packet_name(const int type) +{ + static const char *dccp_packet_names[] = { + [DCCP_PKT_REQUEST] = "REQUEST", + [DCCP_PKT_RESPONSE] = "RESPONSE", + [DCCP_PKT_DATA] = "DATA", + [DCCP_PKT_ACK] = "ACK", + [DCCP_PKT_DATAACK] = "DATAACK", + [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", + [DCCP_PKT_CLOSE] = "CLOSE", + [DCCP_PKT_RESET] = "RESET", + [DCCP_PKT_SYNC] = "SYNC", + [DCCP_PKT_SYNCACK] = "SYNCACK", + }; + + if (type >= DCCP_NR_PKT_TYPES) + return "INVALID"; + else + return dccp_packet_names[type]; +} + +EXPORT_SYMBOL_GPL(dccp_packet_name); + +const char *dccp_state_name(const int state) +{ + static char *dccp_state_names[] = { + [DCCP_OPEN] = "OPEN", + [DCCP_REQUESTING] = "REQUESTING", + [DCCP_PARTOPEN] = "PARTOPEN", + [DCCP_LISTEN] = "LISTEN", + [DCCP_RESPOND] = "RESPOND", + [DCCP_CLOSING] = "CLOSING", + [DCCP_TIME_WAIT] = "TIME_WAIT", + [DCCP_CLOSED] = "CLOSED", + }; + + if (state >= DCCP_MAX_STATES) + return "INVALID STATE!"; + else + return dccp_state_names[state]; +} + +EXPORT_SYMBOL_GPL(dccp_state_name); + +static inline int dccp_listen_start(struct sock *sk) +{ + dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); +} + +int dccp_disconnect(struct sock *sk, int flags) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + const int old_state = sk->sk_state; + + if (old_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + /* ABORT function of RFC793 */ + if (old_state == DCCP_LISTEN) { + inet_csk_listen_stop(sk); + /* FIXME: do the active reset thing */ + } else if (old_state == DCCP_REQUESTING) + sk->sk_err = ECONNRESET; + + dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_send_head != NULL) { + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + inet->dport = 0; + + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); + + sk->sk_shutdown = 0; + sock_reset_flag(sk, SOCK_DONE); + + icsk->icsk_backoff = 0; + inet_csk_delack_init(sk); + __sk_dst_reset(sk); + + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + + sk->sk_error_report(sk); + return err; +} + +int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + dccp_pr_debug("entry\n"); + return -ENOIOCTLCMD; +} + +int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_setsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_getsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const int flags = msg->msg_flags; + const int noblock = flags & MSG_DONTWAIT; + struct sk_buff *skb; + int rc, size; + long timeo; + + if (len > dp->dccps_mss_cache) + return -EMSGSIZE; + + lock_sock(sk); + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + /* + * We have to use sk_stream_wait_connect here to set sk_write_pending, + * so that the trick in dccp_rcv_request_sent_state_process. + */ + /* Wait for a connection to finish. */ + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) + goto out_err; + + size = sk->sk_prot->max_header + len; + release_sock(sk); + skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); + + if (skb == NULL) + goto out_release; + + skb_reserve(skb, sk->sk_prot->max_header); + rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (rc == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + long delay; + + /* + * XXX: This is just to match the Waikato tree CA interaction + * points, after the CCID3 code is stable and I have a better + * understanding of behaviour I'll change this to look more like + * TCP. + */ + while (1) { + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, + skb, len, &delay); + if (rc == 0) + break; + if (rc != -EAGAIN) + goto out_discard; + if (delay > timeo) + goto out_discard; + release_sock(sk); + delay = schedule_timeout(delay); + lock_sock(sk); + timeo -= delay; + if (signal_pending(current)) + goto out_interrupted; + rc = -EPIPE; + if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) + goto out_discard; + } + + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + /* FIXME: we really should have a dccps_ack_pending or use icsk */ + } else if (inet_csk_ack_scheduled(sk) || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } else { +out_discard: + kfree_skb(skb); + } +out_release: + release_sock(sk); + return rc ? : len; +out_err: + rc = sk_stream_error(sk, flags, rc); + goto out_release; +out_interrupted: + rc = sock_intr_errno(timeo); + goto out_discard; +} + +EXPORT_SYMBOL(dccp_sendmsg); + +int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, int *addr_len) +{ + const struct dccp_hdr *dh; + int copied = 0; + unsigned long used; + int err; + int target; /* Read at least this many bytes */ + long timeo; + + lock_sock(sk); + + err = -ENOTCONN; + if (sk->sk_state == DCCP_LISTEN) + goto out; + + timeo = sock_rcvtimeo(sk, nonblock); + + /* Urgent data needs to be handled specially. */ + if (flags & MSG_OOB) + goto recv_urg; + + /* FIXME */ +#if 0 + seq = &tp->copied_seq; + if (flags & MSG_PEEK) { + peek_seq = tp->copied_seq; + seq = &peek_seq; + } +#endif + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + + do { + struct sk_buff *skb; + u32 offset; + + /* FIXME */ +#if 0 + /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + if (tp->urg_data && tp->urg_seq == *seq) { + if (copied) + break; + if (signal_pending(current)) { + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + break; + } + } +#endif + + /* Next get a buffer. */ + + skb = skb_peek(&sk->sk_receive_queue); + do { + if (!skb) + break; + + offset = 0; + dh = dccp_hdr(skb); + + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) + goto found_ok_skb; + + if (dh->dccph_type == DCCP_PKT_RESET || + dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_pr_debug("found fin ok!\n"); + goto found_fin_ok; + } + dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); + BUG_TRAP(flags & MSG_PEEK); + skb = skb->next; + } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + + /* Well, if we have backlog, try to process it now yet. */ + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == DCCP_CLOSED || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + signal_pending(current) || + (flags & MSG_PEEK)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == DCCP_CLOSED) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + + if (!timeo) { + copied = -EAGAIN; + break; + } + + if (signal_pending(current)) { + copied = sock_intr_errno(timeo); + break; + } + } + + /* FIXME: cleanup_rbuf(sk, copied); */ + + if (copied >= target) { + /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else + sk_wait_data(sk, &timeo); + + continue; + + found_ok_skb: + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) + used = len; + + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + } + + copied += used; + len -= used; + + /* FIXME: tcp_rcv_space_adjust(sk); */ + +//skip_copy: + if (used + offset < skb->len) + continue; + + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + continue; + found_fin_ok: + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + break; + + } while (len > 0); + + /* According to UNIX98, msg_name/msg_namelen are ignored + * on connected socket. I was just happy when found this 8) --ANK + */ + + /* Clean up data we have read: This will do ACK frames. */ + /* FIXME: cleanup_rbuf(sk, copied); */ + + release_sock(sk); + return copied; + +out: + release_sock(sk); + return err; + +recv_urg: + /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ + goto out; +} + +static int inet_dccp_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + unsigned char old_state; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) + goto out; + + old_state = sk->sk_state; + if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) + goto out; + + /* Really, if the socket is already in listen state + * we can only allow the backlog to be adjusted. + */ + if (old_state != DCCP_LISTEN) { + /* + * FIXME: here it probably should be sk->sk_prot->listen_start + * see tcp_listen_start + */ + err = dccp_listen_start(sk); + if (err) + goto out; + } + sk->sk_max_ack_backlog = backlog; + err = 0; + +out: + release_sock(sk); + return err; +} + +static const unsigned char dccp_new_state[] = { + /* current state: new state: action: */ + [0] = DCCP_CLOSED, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_REQUESTING] = DCCP_CLOSED, + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_LISTEN] = DCCP_CLOSED, + [DCCP_RESPOND] = DCCP_CLOSED, + [DCCP_CLOSING] = DCCP_CLOSED, + [DCCP_TIME_WAIT] = DCCP_CLOSED, + [DCCP_CLOSED] = DCCP_CLOSED, +}; + +static int dccp_close_state(struct sock *sk) +{ + const int next = dccp_new_state[sk->sk_state]; + const int ns = next & DCCP_STATE_MASK; + + if (ns != sk->sk_state) + dccp_set_state(sk, ns); + + return next & DCCP_ACTION_FIN; +} + +void dccp_close(struct sock *sk, long timeout) +{ + struct sk_buff *skb; + + lock_sock(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (sk->sk_state == DCCP_LISTEN) { + dccp_set_state(sk, DCCP_CLOSED); + + /* Special case. */ + inet_csk_listen_stop(sk); + + goto adjudge_to_death; + } + + /* + * We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + *reader process may not have drained the data yet! + */ + /* FIXME: check for unread data */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + __kfree_skb(skb); + } + + if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + sk->sk_prot->disconnect(sk, 0); + } else if (dccp_close_state(sk)) { + dccp_send_close(sk); + } + + sk_stream_wait_close(sk, timeout); + +adjudge_to_death: + release_sock(sk); + /* + * Now socket is owned by kernel and we acquire BH lock + * to finish close. No need to check for user refs. + */ + local_bh_disable(); + bh_lock_sock(sk); + BUG_TRAP(!sock_owned_by_user(sk)); + + sock_hold(sk); + sock_orphan(sk); + + if (sk->sk_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + atomic_inc(&dccp_orphan_count); + if (sk->sk_state == DCCP_CLOSED) + inet_csk_destroy_sock(sk); + + /* Otherwise, socket is reprieved until protocol close. */ + + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); +} + +void dccp_shutdown(struct sock *sk, int how) +{ + dccp_pr_debug("entry\n"); +} + +struct proto_ops inet_dccp_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = inet_release, + .bind = inet_bind, + .connect = inet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + .poll = sock_no_poll, + .ioctl = inet_ioctl, + .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +extern struct net_proto_family inet_family_ops; + +static struct inet_protosw dccp_v4_protosw = { + .type = SOCK_DCCP, + .protocol = IPPROTO_DCCP, + .prot = &dccp_v4_prot, + .ops = &inet_dccp_ops, + .capability = -1, + .no_check = 0, + .flags = 0, +}; + +/* + * This is the global socket data structure used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ +struct socket *dccp_ctl_socket; + +static char dccp_ctl_socket_err_msg[] __initdata = + KERN_ERR "DCCP: Failed to create the control socket.\n"; + +static int __init dccp_ctl_sock_init(void) +{ + int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, + &dccp_ctl_socket); + if (rc < 0) + printk(dccp_ctl_socket_err_msg); + else { + dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; + inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; + + /* Unhash it so that IP input processing does not even + * see it, we do not wish this socket to see incoming + * packets. + */ + dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); + } + + return rc; +} + +static void __exit dccp_ctl_sock_exit(void) +{ + if (dccp_ctl_socket != NULL) + sock_release(dccp_ctl_socket); +} + +static int __init init_dccp_v4_mibs(void) +{ + int rc = -ENOMEM; + + dccp_statistics[0] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[0] == NULL) + goto out; + + dccp_statistics[1] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[1] == NULL) + goto out_free_one; + + rc = 0; +out: + return rc; +out_free_one: + free_percpu(dccp_statistics[0]); + dccp_statistics[0] = NULL; + goto out; + +} + +static int thash_entries; +module_param(thash_entries, int, 0444); +MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); + +int dccp_debug; +module_param(dccp_debug, int, 0444); +MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); + +static int __init dccp_init(void) +{ + unsigned long goal; + int ehash_order, bhash_order, i; + int rc = proto_register(&dccp_v4_prot, 1); + + if (rc) + goto out; + + dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!dccp_hashinfo.bind_bucket_cachep) + goto out_proto_unregister; + + /* + * Size and allocate the main established and bind bucket + * hash tables. + * + * The methodology is similar to that of the buffer cache. + */ + if (num_physpages >= (128 * 1024)) + goal = num_physpages >> (21 - PAGE_SHIFT); + else + goal = num_physpages >> (23 - PAGE_SHIFT); + + if (thash_entries) + goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; + for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) + ; + do { + dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + sizeof(struct inet_ehash_bucket); + dccp_hashinfo.ehash_size >>= 1; + while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) + dccp_hashinfo.ehash_size--; + dccp_hashinfo.ehash = (struct inet_ehash_bucket *) + __get_free_pages(GFP_ATOMIC, ehash_order); + } while (!dccp_hashinfo.ehash && --ehash_order > 0); + + if (!dccp_hashinfo.ehash) { + printk(KERN_CRIT "Failed to allocate DCCP " + "established hash table\n"); + goto out_free_bind_bucket_cachep; + } + + for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { + rwlock_init(&dccp_hashinfo.ehash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); + } + + bhash_order = ehash_order; + + do { + dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / + sizeof(struct inet_bind_hashbucket); + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) + continue; + dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) + __get_free_pages(GFP_ATOMIC, bhash_order); + } while (!dccp_hashinfo.bhash && --bhash_order >= 0); + + if (!dccp_hashinfo.bhash) { + printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); + goto out_free_dccp_ehash; + } + + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { + spin_lock_init(&dccp_hashinfo.bhash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); + } + + if (init_dccp_v4_mibs()) + goto out_free_dccp_bhash; + + rc = -EAGAIN; + if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) + goto out_free_dccp_v4_mibs; + + inet_register_protosw(&dccp_v4_protosw); + + rc = dccp_ctl_sock_init(); + if (rc) + goto out_unregister_protosw; +out: + return rc; +out_unregister_protosw: + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); +out_free_dccp_v4_mibs: + free_percpu(dccp_statistics[0]); + free_percpu(dccp_statistics[1]); + dccp_statistics[0] = dccp_statistics[1] = NULL; +out_free_dccp_bhash: + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); + dccp_hashinfo.bhash = NULL; +out_free_dccp_ehash: + free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); + dccp_hashinfo.ehash = NULL; +out_free_bind_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + dccp_hashinfo.bind_bucket_cachep = NULL; +out_proto_unregister: + proto_unregister(&dccp_v4_prot); + goto out; +} + +static const char dccp_del_proto_err_msg[] __exitdata = + KERN_ERR "can't remove dccp net_protocol\n"; + +static void __exit dccp_fini(void) +{ + dccp_ctl_sock_exit(); + + inet_unregister_protosw(&dccp_v4_protosw); + + if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) + printk(dccp_del_proto_err_msg); + + /* Free the control endpoint. */ + sock_release(dccp_ctl_socket); + + proto_unregister(&dccp_v4_prot); + + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +} + +module_init(dccp_init); +module_exit(dccp_fini); + +/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..8c396ee01aac --- /dev/null +++ b/net/dccp/timer.c @@ -0,0 +1,249 @@ +/* + * net/dccp/timer.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "dccp.h" + +static void dccp_write_timer(unsigned long data); +static void dccp_keepalive_timer(unsigned long data); +static void dccp_delack_timer(unsigned long data); + +void dccp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, + &dccp_keepalive_timer); +} + +static void dccp_write_err(struct sock *sk) +{ + sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_error_report(sk); + + dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); + dccp_done(sk); + DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); +} + +/* A write timeout has occurred. Process the after effects. */ +static int dccp_write_timeout(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + int retry_until; + + if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { + if (icsk->icsk_retransmits != 0) + dst_negative_advice(&sk->sk_dst_cache); + retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + } else { + if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black + hole detection. :-( + + It is place to make it. It is not made. I do not want + to make it. It is disguisting. It does not work in any + case. Let me to cite the same draft, which requires for + us to implement this: + + "The one security concern raised by this memo is that ICMP black holes + are often caused by over-zealous security administrators who block + all ICMP messages. It is vitally important that those who design and + deploy security systems understand the impact of strict filtering on + upper-layer protocols. The safest web site in the world is worthless + if most TCP implementations cannot transfer data from it. It would + be far nicer to have all of the black holes fixed rather than fixing + all of the TCP implementations." + + Golden words :-). + */ + + dst_negative_advice(&sk->sk_dst_cache); + } + + retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; + /* + * FIXME: see tcp_write_timout and tcp_out_of_resources + */ + } + + if (icsk->icsk_retransmits >= retry_until) { + /* Has it gone just too far? */ + dccp_write_err(sk); + return 1; + } + return 0; +} + +/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ +static void dccp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + icsk->icsk_ack.blocked = 1; + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + goto out; + } + + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } + dccp_send_ack(sk); + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * The DCCP retransmit timer. + */ +static void dccp_retransmit_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* + * sk->sk_send_head has to have one skb with + * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP + * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake + * (PARTOPEN timer), etc). + */ + BUG_TRAP(sk->sk_send_head != NULL); + + /* + * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was + * sent, no need to retransmit, this sock is dead. + */ + if (dccp_write_timeout(sk)) + goto out; + + /* + * We want to know the number of packets retransmitted, not the + * total number of retransmissions of clones of original packets. + */ + if (icsk->icsk_retransmits == 0) + DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); + + if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + /* + * Retransmission failed because of local congestion, + * do not backoff. + */ + if (icsk->icsk_retransmits == 0) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, + TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); + goto out; + } + + icsk->icsk_backoff++; + icsk->icsk_retransmits++; + + icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) + __sk_dst_reset(sk); +out:; +} + +static void dccp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + int event = 0; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later */ + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) + goto out; + + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + goto out; + } + + event = icsk->icsk_pending; + icsk->icsk_pending = 0; + + switch (event) { + case ICSK_TIME_RETRANS: + dccp_retransmit_timer(sk); + break; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * Timer for listening sockets + */ +static void dccp_response_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; + + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, + DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); +} + +static void dccp_keepalive_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + /* Only process if socket is not in use. */ + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + inet_csk_reset_keepalive_timer(sk, HZ / 20); + goto out; + } + + if (sk->sk_state == DCCP_LISTEN) { + dccp_response_timer(sk); + goto out; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} From a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:09 -0700 Subject: [PATCH 330/584] [ICSK]: Move generalised functions from tcp to inet_connection_sock This also improves reqsk_queue_prune and renames it to inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock and inet_request_sock objects, not just with request_sock ones thus belonging to inet_request_sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 7 + include/net/request_sock.h | 4 - include/net/tcp.h | 6 - net/dccp/timer.c | 6 +- net/ipv4/inet_connection_sock.c | 214 +++++++++++++++++++++++++++++ net/ipv4/tcp.c | 120 ---------------- net/ipv4/tcp_timer.c | 93 +------------ 7 files changed, 224 insertions(+), 226 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 692825fc8135..bec19d5cff26 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, reqsk_free(req); } +extern void inet_csk_reqsk_queue_prune(struct sock *parent, + const unsigned long interval, + const unsigned long timeout, + const unsigned long max_rto); + +extern void inet_csk_destroy_sock(struct sock *sk); +extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 447d287a38fd..b52cc52ffe39 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, write_unlock(&queue->syn_wait_lock); } -extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, - const unsigned long interval, const unsigned long timeout, - const unsigned long max_rto, int max_retries); - #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2423f059b62b..077db859ae0d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int inet_csk_listen_start(struct sock *sk, - const int nr_table_entries); - extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab); @@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void inet_csk_destroy_sock(struct sock *sk); - - /* * Calculate(/check) TCP checksum */ diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8c396ee01aac..9f1f1ab9e2b4 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -220,11 +220,7 @@ out: */ static void dccp_response_timer(struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); - const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; - - reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, - DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); } static void dccp_keepalive_timer(unsigned long data) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 136ada050b63..026630a15ea0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, inet_csk_reqsk_queue_added(sk, timeout); } +/* Only thing we need from tcp.h */ +extern int sysctl_tcp_synack_retries; + EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +void inet_csk_reqsk_queue_prune(struct sock *parent, + const unsigned long interval, + const unsigned long timeout, + const unsigned long max_rto) +{ + struct inet_connection_sock *icsk = inet_csk(parent); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; + struct listen_sock *lopt = queue->listen_opt; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int thresh = max_retries; + unsigned long now = jiffies; + struct request_sock **reqp, *req; + int i, budget; + + if (lopt == NULL || lopt->qlen == 0) + return; + + /* Normally all the openreqs are young and become mature + * (i.e. converted to established socket) for first timeout. + * If synack was not acknowledged for 3 seconds, it means + * one of the following things: synack was lost, ack was lost, + * rtt is high or nobody planned to ack (i.e. synflood). + * When server is a bit loaded, queue is populated with old + * open requests, reducing effective size of queue. + * When server is well loaded, queue size reduces to zero + * after several minutes of work. It is not synflood, + * it is normal operation. The solution is pruning + * too old entries overriding normal timeout, when + * situation becomes dangerous. + * + * Essentially, we reserve half of room for young + * embrions; and abort old ones without pity, if old + * ones are about to clog our table. + */ + if (lopt->qlen>>(lopt->max_qlen_log-1)) { + int young = (lopt->qlen_young<<1); + + while (thresh > 2) { + if (lopt->qlen < young) + break; + thresh--; + young <<= 1; + } + } + + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; + + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); + i = lopt->clock_hand; + + do { + reqp=&lopt->syn_table[i]; + while ((req = *reqp) != NULL) { + if (time_after_eq(now, req->expires)) { + if ((req->retrans < thresh || + (inet_rsk(req)->acked && req->retrans < max_retries)) + && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { + unsigned long timeo; + + if (req->retrans++ == 0) + lopt->qlen_young--; + timeo = min((timeout << req->retrans), max_rto); + req->expires = now + timeo; + reqp = &req->dl_next; + continue; + } + + /* Drop this request */ + inet_csk_reqsk_queue_unlink(parent, req, reqp); + reqsk_queue_removed(queue, req); + reqsk_free(req); + continue; + } + reqp = &req->dl_next; + } + + i = (i + 1) & (lopt->nr_table_entries - 1); + + } while (--budget > 0); + + lopt->clock_hand = i; + + if (lopt->qlen) + inet_csk_reset_keepalive_timer(parent, interval); +} + +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); + struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority) { @@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } EXPORT_SYMBOL_GPL(inet_csk_clone); + +/* + * At this point, there should be no process reference to this + * socket, and thus no user references at all. Therefore we + * can assume the socket waitqueue is inactive and nobody will + * try to jump onto it. + */ +void inet_csk_destroy_sock(struct sock *sk) +{ + BUG_TRAP(sk->sk_state == TCP_CLOSE); + BUG_TRAP(sock_flag(sk, SOCK_DEAD)); + + /* It cannot be in hash table! */ + BUG_TRAP(sk_unhashed(sk)); + + /* If it has not 0 inet_sk(sk)->num, it must be bound */ + BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); + + sk->sk_prot->destroy(sk); + + sk_stream_kill_queues(sk); + + xfrm_sk_free_policy(sk); + + sk_refcnt_debug_release(sk); + + atomic_dec(sk->sk_prot->orphan_count); + sock_put(sk); +} + +EXPORT_SYMBOL(inet_csk_destroy_sock); + +int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) +{ + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); + + if (rc != 0) + return rc; + + sk->sk_max_ack_backlog = 0; + sk->sk_ack_backlog = 0; + inet_csk_delack_init(sk); + + /* There is race window here: we announce ourselves listening, + * but this transition is still not validated by get_port(). + * It is OK, because this socket enters to hash table only + * after validation is complete. + */ + sk->sk_state = TCP_LISTEN; + if (!sk->sk_prot->get_port(sk, inet->num)) { + inet->sport = htons(inet->num); + + sk_dst_reset(sk); + sk->sk_prot->hash(sk); + + return 0; + } + + sk->sk_state = TCP_CLOSE; + __reqsk_queue_destroy(&icsk->icsk_accept_queue); + return -EADDRINUSE; +} + +EXPORT_SYMBOL_GPL(inet_csk_listen_start); + +/* + * This routine closes sockets which have been at least partially + * opened, but not yet accepted. + */ +void inet_csk_listen_stop(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock *acc_req; + struct request_sock *req; + + inet_csk_delete_keepalive_timer(sk); + + /* make all the listen_opt local to us */ + acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); + + /* Following specs, it would be better either to send FIN + * (and enter FIN-WAIT-1, it is normal close) + * or to send active reset (abort). + * Certainly, it is pretty dangerous while synflood, but it is + * bad justification for our negligence 8) + * To be honest, we are not able to make either + * of the variants now. --ANK + */ + reqsk_queue_destroy(&icsk->icsk_accept_queue); + + while ((req = acc_req) != NULL) { + struct sock *child = req->sk; + + acc_req = req->dl_next; + + local_bh_disable(); + bh_lock_sock(child); + BUG_TRAP(!sock_owned_by_user(child)); + sock_hold(child); + + sk->sk_prot->disconnect(child, O_NONBLOCK); + + sock_orphan(child); + + atomic_inc(sk->sk_prot->orphan_count); + + inet_csk_destroy_sock(child); + + bh_unlock_sock(child); + local_bh_enable(); + sock_put(child); + + sk_acceptq_removed(sk); + __reqsk_free(req); + } + BUG_TRAP(!sk->sk_ack_backlog); +} + +EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a4e9eec44895..4bda522d25cf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } -int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) -{ - struct inet_sock *inet = inet_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); - - if (rc != 0) - return rc; - - sk->sk_max_ack_backlog = 0; - sk->sk_ack_backlog = 0; - inet_csk_delack_init(sk); - - /* There is race window here: we announce ourselves listening, - * but this transition is still not validated by get_port(). - * It is OK, because this socket enters to hash table only - * after validation is complete. - */ - sk->sk_state = TCP_LISTEN; - if (!sk->sk_prot->get_port(sk, inet->num)) { - inet->sport = htons(inet->num); - - sk_dst_reset(sk); - sk->sk_prot->hash(sk); - - return 0; - } - - sk->sk_state = TCP_CLOSE; - __reqsk_queue_destroy(&icsk->icsk_accept_queue); - return -EADDRINUSE; -} - -EXPORT_SYMBOL_GPL(inet_csk_listen_start); - -/* - * This routine closes sockets which have been at least partially - * opened, but not yet accepted. - */ -void inet_csk_listen_stop(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct request_sock *acc_req; - struct request_sock *req; - - inet_csk_delete_keepalive_timer(sk); - - /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); - - /* Following specs, it would be better either to send FIN - * (and enter FIN-WAIT-1, it is normal close) - * or to send active reset (abort). - * Certainly, it is pretty dangerous while synflood, but it is - * bad justification for our negligence 8) - * To be honest, we are not able to make either - * of the variants now. --ANK - */ - reqsk_queue_destroy(&icsk->icsk_accept_queue); - - while ((req = acc_req) != NULL) { - struct sock *child = req->sk; - - acc_req = req->dl_next; - - local_bh_disable(); - bh_lock_sock(child); - BUG_TRAP(!sock_owned_by_user(child)); - sock_hold(child); - - sk->sk_prot->disconnect(child, O_NONBLOCK); - - sock_orphan(child); - - atomic_inc(sk->sk_prot->orphan_count); - - inet_csk_destroy_sock(child); - - bh_unlock_sock(child); - local_bh_enable(); - sock_put(child); - - sk_acceptq_removed(sk); - __reqsk_free(req); - } - BUG_TRAP(!sk->sk_ack_backlog); -} - -EXPORT_SYMBOL_GPL(inet_csk_listen_stop); - static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; @@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how) } } -/* - * At this point, there should be no process reference to this - * socket, and thus no user references at all. Therefore we - * can assume the socket waitqueue is inactive and nobody will - * try to jump onto it. - */ -void inet_csk_destroy_sock(struct sock *sk) -{ - BUG_TRAP(sk->sk_state == TCP_CLOSE); - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); - - /* It cannot be in hash table! */ - BUG_TRAP(sk_unhashed(sk)); - - /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); - - sk->sk_prot->destroy(sk); - - sk_stream_kill_queues(sk); - - xfrm_sk_free_policy(sk); - - sk_refcnt_debug_release(sk); - - atomic_dec(sk->sk_prot->orphan_count); - sock_put(sk); -} - void tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; @@ -2258,7 +2139,6 @@ void __init tcp_init(void) } EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(inet_csk_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_ioctl); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b614ad4d30c9..72cec6981830 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -424,103 +424,14 @@ out_unlock: sock_put(sk); } -void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, - const unsigned long interval, const unsigned long timeout, - const unsigned long max_rto, int max_retries) -{ - struct inet_connection_sock *icsk = inet_csk(parent); - struct listen_sock *lopt = queue->listen_opt; - int thresh = max_retries; - unsigned long now = jiffies; - struct request_sock **reqp, *req; - int i, budget; - - if (lopt == NULL || lopt->qlen == 0) - return; - - /* Normally all the openreqs are young and become mature - * (i.e. converted to established socket) for first timeout. - * If synack was not acknowledged for 3 seconds, it means - * one of the following things: synack was lost, ack was lost, - * rtt is high or nobody planned to ack (i.e. synflood). - * When server is a bit loaded, queue is populated with old - * open requests, reducing effective size of queue. - * When server is well loaded, queue size reduces to zero - * after several minutes of work. It is not synflood, - * it is normal operation. The solution is pruning - * too old entries overriding normal timeout, when - * situation becomes dangerous. - * - * Essentially, we reserve half of room for young - * embrions; and abort old ones without pity, if old - * ones are about to clog our table. - */ - if (lopt->qlen>>(lopt->max_qlen_log-1)) { - int young = (lopt->qlen_young<<1); - - while (thresh > 2) { - if (lopt->qlen < young) - break; - thresh--; - young <<= 1; - } - } - - if (queue->rskq_defer_accept) - max_retries = queue->rskq_defer_accept; - - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); - i = lopt->clock_hand; - - do { - reqp=&lopt->syn_table[i]; - while ((req = *reqp) != NULL) { - if (time_after_eq(now, req->expires)) { - if ((req->retrans < thresh || - (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { - unsigned long timeo; - - if (req->retrans++ == 0) - lopt->qlen_young--; - timeo = min((timeout << req->retrans), max_rto); - req->expires = now + timeo; - reqp = &req->dl_next; - continue; - } - - /* Drop this request */ - inet_csk_reqsk_queue_unlink(parent, req, reqp); - reqsk_queue_removed(&icsk->icsk_accept_queue, req); - reqsk_free(req); - continue; - } - reqp = &req->dl_next; - } - - i = (i + 1) & (lopt->nr_table_entries - 1); - - } while (--budget > 0); - - lopt->clock_hand = i; - - if (lopt->qlen) - inet_csk_reset_keepalive_timer(parent, interval); -} - -EXPORT_SYMBOL_GPL(reqsk_queue_prune); - /* * Timer for listening sockets */ static void tcp_synack_timer(struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); - const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; - - reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, - TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries); + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); } void tcp_set_keepalive(struct sock *sk, int val) From 95b81ef794278c835b321f6376b0522cd5df59b7 Mon Sep 17 00:00:00 2001 From: Yoshifumi Nishida Date: Tue, 9 Aug 2005 20:15:35 -0700 Subject: [PATCH 331/584] [DCCP]: Fix checksum routines Signed-off-by: Yoshifumi Nishida Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 3 ++- net/dccp/ipv4.c | 38 ++++++++++++++++++++------------------ net/dccp/output.c | 9 ++++++--- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index fb83454102c1..55b690ab61ae 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -242,7 +242,8 @@ extern int dccp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen); extern void dccp_shutdown(struct sock *sk, int how); -extern int dccp_v4_checksum(struct sk_buff *skb); +extern int dccp_v4_checksum(const struct sk_buff *skb, + const u32 saddr, const u32 daddr); extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 083bacaecb3b..7b90606ec10e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -802,9 +802,9 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(struct sk_buff *skb) +int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr) { - struct dccp_hdr* dh = dccp_hdr(skb); + const struct dccp_hdr* dh = dccp_hdr(skb); int checksum_len; u32 tmp; @@ -816,24 +816,24 @@ int dccp_v4_checksum(struct sk_buff *skb) } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_fold(tmp); + return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp); } -static int dccp_v4_verify_checksum(struct sk_buff *skb) +static int dccp_v4_verify_checksum(struct sk_buff *skb, + const u32 saddr, const u32 daddr) { - struct dccp_hdr *th = dccp_hdr(skb); - const u16 remote_checksum = th->dccph_checksum; - u16 local_checksum; + struct dccp_hdr *dh = dccp_hdr(skb); + int checksum_len; + u32 tmp; - /* FIXME: don't mess with skb payload */ - th->dccph_checksum = 0; /* zero it for computation */ - - local_checksum = dccp_v4_checksum(skb); - - /* FIXME: don't mess with skb payload */ - th->dccph_checksum = remote_checksum; /* put it back */ - - return remote_checksum == local_checksum ? 0 : -1; + if (dh->dccph_cscov == 0) + checksum_len = skb->len; + else { + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + } + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); + return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp) == 0 ? 0 : -1; } static struct dst_entry* dccp_v4_route_skb(struct sock *sk, @@ -902,7 +902,8 @@ void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, + rxskb->nh.iph->daddr); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, @@ -1024,7 +1025,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) } /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb) < 0) { + if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, + skb->nh.iph->daddr) < 0) { dccp_pr_debug("header checksum is incorrect\n"); return 1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 22ca2910d4f2..4945eaa9d1a4 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -93,7 +93,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, + inet->daddr); if (dcb->dccpd_type == DCCP_PKT_ACK || dcb->dccpd_type == DCCP_PKT_DATAACK) @@ -193,7 +194,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, + inet_rsk(req)->rmt_addr); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; @@ -242,7 +244,8 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dccp_hdr_reset(skb)->dccph_reset_code = code; - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr, + inet_sk(sk)->daddr); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; From 74459dc7bacda04d14626d239c8f5c4dac22560d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:51 -0700 Subject: [PATCH 332/584] [LIST]: Introduce list_for_each_entry_safe_continue Used in the dccp CCID3 code, that is going to be submitted RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/list.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/list.h b/include/linux/list.h index aab2db21b013..597094e0fdb5 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -418,6 +418,19 @@ static inline void list_splice_init(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * list_for_each_entry_safe_continue - iterate over list of given type + * continuing after existing point safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + /** * list_for_each_rcu - iterate over an rcu-protected list * @pos: the &struct list_head to use as a loop counter. From 757f612e091e7d13707eedc3ff71f1a9b53f5537 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:16:04 -0700 Subject: [PATCH 333/584] [CCID3]: Reenable list_for_each_entry_safe_continue usage Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4f45902cb55e..04299c7565f3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1272,13 +1272,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ -#if 0 - FIXME! list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { list_del_init(&entry->ccid3htx_node); ccid3_tx_hist_entry_delete(entry); } -#endif if (hctx->ccid3hctx_x < 10) { ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); hctx->ccid3hctx_x = 10; @@ -1820,8 +1817,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) a_next = b_next; num_later = 1; -#if 0 - FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { if (num_later == 0) { a_loss = entry; @@ -1830,7 +1826,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) entry->ccid3hrx_type == DCCP_PKT_DATAACK) --num_later; } -#endif if (a_loss == NULL) { if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { @@ -1848,8 +1843,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) /* Locate a lost data packet */ entry = packet = b_loss; -#if 0 - FIXME MERGE GIT! list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); @@ -1875,7 +1868,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) if (packet == a_loss) break; } -#endif if (seq_loss != DCCP_MAX_SEQNO + 1) win_loss = a_loss->ccid3hrx_win_count; From 2009493065e01b1fe27c1b98ffbcfab98e185f72 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:16:32 -0700 Subject: [PATCH 334/584] [TG3]: Add basic register access function pointers This patch adds the basic function pointers to do register accesses in the fast path. This was suggested by David Miller. The idea is that various register access methods for different hardware errata can easily be implemented with these function pointers and performance will not be degraded on chips that use normal register access methods. The various register read write macros (e.g. tw32, tr32, tw32_mailbox) are redefined to call the function pointers. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 34 +++++++++++++++++++++++----------- drivers/net/tg3.h | 8 ++++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 6d4ab1e333b5..13283c29f802 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -366,7 +366,7 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) } } -static inline void _tw32_rx_mbox(struct tg3 *tp, u32 off, u32 val) +static void tg3_write32_rx_mbox(struct tg3 *tp, u32 off, u32 val) { void __iomem *mbox = tp->regs + off; writel(val, mbox); @@ -374,7 +374,7 @@ static inline void _tw32_rx_mbox(struct tg3 *tp, u32 off, u32 val) readl(mbox); } -static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val) +static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) { void __iomem *mbox = tp->regs + off; writel(val, mbox); @@ -384,17 +384,23 @@ static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val) readl(mbox); } -#define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg)) -#define tw32_rx_mbox(reg, val) _tw32_rx_mbox(tp, reg, val) -#define tw32_tx_mbox(reg, val) _tw32_tx_mbox(tp, reg, val) +static void tg3_write32(struct tg3 *tp, u32 off, u32 val) +{ + writel(val, tp->regs + off); +} -#define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val)) +static u32 tg3_read32(struct tg3 *tp, u32 off) +{ + return (readl(tp->regs + off)); +} + +#define tw32_mailbox(reg, val) tp->write32_mbox(tp, reg, val) +#define tw32_rx_mbox(reg, val) tp->write32_rx_mbox(tp, reg, val) +#define tw32_tx_mbox(reg, val) tp->write32_tx_mbox(tp, reg, val) + +#define tw32(reg,val) tp->write32(tp, reg, val) #define tw32_f(reg,val) _tw32_flush(tp,(reg),(val)) -#define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg)) -#define tw8(reg,val) writeb(((val) & 0xff), tp->regs + (reg)) -#define tr32(reg) readl(tp->regs + (reg)) -#define tr16(reg) readw(tp->regs + (reg)) -#define tr8(reg) readb(tp->regs + (reg)) +#define tr32(reg) tp->read32(tp, reg) static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) { @@ -9325,6 +9331,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg); } + tp->read32 = tg3_read32; + tp->write32 = tg3_write_indirect_reg32; + tp->write32_mbox = tg3_write32; + tp->write32_tx_mbox = tg3_write32_tx_mbox; + tp->write32_rx_mbox = tg3_write32_rx_mbox; + /* Get eeprom hw config before calling tg3_set_power_state(). * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be * determined before calling tg3_set_power_state() so that diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 5c4433c147fa..394acddd53b3 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2049,6 +2049,10 @@ struct tg3 { spinlock_t lock; spinlock_t indirect_lock; + u32 (*read32) (struct tg3 *, u32); + void (*write32) (struct tg3 *, u32, u32); + void (*write32_mbox) (struct tg3 *, u32, + u32); void __iomem *regs; struct net_device *dev; struct pci_dev *pdev; @@ -2060,6 +2064,8 @@ struct tg3 { u32 msg_enable; /* begin "tx thread" cacheline section */ + void (*write32_tx_mbox) (struct tg3 *, u32, + u32); u32 tx_prod; u32 tx_cons; u32 tx_pending; @@ -2071,6 +2077,8 @@ struct tg3 { dma_addr_t tx_desc_mapping; /* begin "rx thread" cacheline section */ + void (*write32_rx_mbox) (struct tg3 *, u32, + u32); u32 rx_rcb_ptr; u32 rx_std_ptr; u32 rx_jumbo_ptr; From 1ee582d8e49a1c9dd43b2599f1cd26507182a8d4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:16:46 -0700 Subject: [PATCH 335/584] [TG3]: Add various register methods This patch adds various dedicated register read/write methods for the existing workarounds, including PCIX target workaround, write with read flush, etc. The chips that require these workarounds will use these dedicated access functions. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 66 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 13283c29f802..80fbb183f755 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -340,16 +340,16 @@ static struct { static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) { - if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { - spin_lock_bh(&tp->indirect_lock); - pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); - pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); - spin_unlock_bh(&tp->indirect_lock); - } else { - writel(val, tp->regs + off); - if ((tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) != 0) - readl(tp->regs + off); - } + spin_lock_bh(&tp->indirect_lock); + pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); + pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); + spin_unlock_bh(&tp->indirect_lock); +} + +static void tg3_write_flush_reg32(struct tg3 *tp, u32 off, u32 val) +{ + writel(val, tp->regs + off); + readl(tp->regs + off); } static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) @@ -366,14 +366,6 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) } } -static void tg3_write32_rx_mbox(struct tg3 *tp, u32 off, u32 val) -{ - void __iomem *mbox = tp->regs + off; - writel(val, mbox); - if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) - readl(mbox); -} - static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) { void __iomem *mbox = tp->regs + off; @@ -4222,7 +4214,7 @@ static void tg3_stop_fw(struct tg3 *); static int tg3_chip_reset(struct tg3 *tp) { u32 val; - u32 flags_save; + void (*write_op)(struct tg3 *, u32, u32); int i; if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X)) @@ -4234,8 +4226,9 @@ static int tg3_chip_reset(struct tg3 *tp) * fun things. So, temporarily disable the 5701 * hardware workaround, while we do the reset. */ - flags_save = tp->tg3_flags; - tp->tg3_flags &= ~TG3_FLAG_5701_REG_WRITE_BUG; + write_op = tp->write32; + if (write_op == tg3_write_flush_reg32) + tp->write32 = tg3_write32; /* do the reset */ val = GRC_MISC_CFG_CORECLK_RESET; @@ -4254,8 +4247,8 @@ static int tg3_chip_reset(struct tg3 *tp) val |= GRC_MISC_CFG_KEEP_GPHY_POWER; tw32(GRC_MISC_CFG, val); - /* restore 5701 hardware bug workaround flag */ - tp->tg3_flags = flags_save; + /* restore 5701 hardware bug workaround write method */ + tp->write32 = write_op; /* Unfortunately, we have to delay before the PCI read back. * Some 575X chips even will not respond to a PCI cfg access @@ -4641,7 +4634,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b int cpu_scratch_size, struct fw_info *info) { int err, i; - u32 orig_tg3_flags = tp->tg3_flags; void (*write_op)(struct tg3 *, u32, u32); if (cpu_base == TX_CPU_BASE && @@ -4657,11 +4649,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b else write_op = tg3_write_indirect_reg32; - /* Force use of PCI config space for indirect register - * write calls. - */ - tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG; - /* It is possible that bootcode is still loading at this point. * Get the nvram lock first before halting the cpu. */ @@ -4697,7 +4684,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b err = 0; out: - tp->tg3_flags = orig_tg3_flags; return err; } @@ -9331,11 +9317,25 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg); } + /* Default fast path register access methods */ tp->read32 = tg3_read32; - tp->write32 = tg3_write_indirect_reg32; + tp->write32 = tg3_write32; tp->write32_mbox = tg3_write32; - tp->write32_tx_mbox = tg3_write32_tx_mbox; - tp->write32_rx_mbox = tg3_write32_rx_mbox; + tp->write32_tx_mbox = tg3_write32; + tp->write32_rx_mbox = tg3_write32; + + /* Various workaround register access methods */ + if (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) + tp->write32 = tg3_write_indirect_reg32; + else if (tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) + tp->write32 = tg3_write_flush_reg32; + + if ((tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG) || + (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)) { + tp->write32_tx_mbox = tg3_write32_tx_mbox; + if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) + tp->write32_rx_mbox = tg3_write_flush_reg32; + } /* Get eeprom hw config before calling tg3_set_power_state(). * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be From 09ee929cccfd0b56ea3724b3c6299fbbe813df43 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:17:00 -0700 Subject: [PATCH 336/584] [TG3]: Add mailbox read method This patch adds the mailbox read method and also adds an inline function tw32_mailbox_f() for mailbox writes that require read flush. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 41 +++++++++++++++++++++++------------------ drivers/net/tg3.h | 1 + 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 80fbb183f755..8411e0f4cb69 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -366,6 +366,12 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) } } +static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val) +{ + tp->write32_mbox(tp, off, val); + tp->read32_mbox(tp, off); +} + static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) { void __iomem *mbox = tp->regs + off; @@ -387,8 +393,10 @@ static u32 tg3_read32(struct tg3 *tp, u32 off) } #define tw32_mailbox(reg, val) tp->write32_mbox(tp, reg, val) +#define tw32_mailbox_f(reg, val) tw32_mailbox_flush(tp, (reg), (val)) #define tw32_rx_mbox(reg, val) tp->write32_rx_mbox(tp, reg, val) #define tw32_tx_mbox(reg, val) tp->write32_tx_mbox(tp, reg, val) +#define tr32_mailbox(reg) tp->read32_mbox(tp, reg) #define tw32(reg,val) tp->write32(tp, reg, val) #define tw32_f(reg,val) _tw32_flush(tp,(reg),(val)) @@ -420,8 +428,7 @@ static void tg3_disable_ints(struct tg3 *tp) { tw32(TG3PCI_MISC_HOST_CTRL, (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT)); - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); } static inline void tg3_cond_int(struct tg3 *tp) @@ -437,9 +444,8 @@ static void tg3_enable_ints(struct tg3 *tp) tw32(TG3PCI_MISC_HOST_CTRL, (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, - (tp->last_tag << 24)); - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + (tp->last_tag << 24)); tg3_cond_int(tp); } @@ -3276,9 +3282,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* No work, shared interrupt perhaps? re-enable * interrupts, and flush that PCI write */ - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000); - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); } } else { /* shared interrupt */ handled = 0; @@ -3321,9 +3326,8 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r /* no work, shared interrupt perhaps? re-enable * interrupts, and flush that PCI write */ - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, - tp->last_tag << 24); - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + tp->last_tag << 24); } } else { /* shared interrupt */ handled = 0; @@ -5800,8 +5804,7 @@ static int tg3_reset_hw(struct tg3 *tp) tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl); udelay(100); - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0); - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0); tp->last_tag = 0; if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { @@ -6190,7 +6193,8 @@ static int tg3_test_interrupt(struct tg3 *tp) HOSTCC_MODE_NOW); for (i = 0; i < 5; i++) { - int_mbox = tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + int_mbox = tr32_mailbox(MAILBOX_INTERRUPT_0 + + TG3_64BIT_REG_LOW); if (int_mbox != 0) break; msleep(10); @@ -6590,10 +6594,10 @@ static int tg3_open(struct net_device *dev) /* Mailboxes */ printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n", - tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0), - tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4), - tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0), - tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4)); + tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0), + tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4), + tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0), + tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4)); /* NIC side send descriptors. */ for (i = 0; i < 6; i++) { @@ -7893,7 +7897,7 @@ static int tg3_test_loopback(struct tg3 *tp) num_pkts++; tw32_tx_mbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, send_idx); - tr32(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW); + tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW); udelay(10); @@ -9320,6 +9324,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) /* Default fast path register access methods */ tp->read32 = tg3_read32; tp->write32 = tg3_write32; + tp->read32_mbox = tg3_read32; tp->write32_mbox = tg3_write32; tp->write32_tx_mbox = tg3_write32; tp->write32_rx_mbox = tg3_write32; diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 394acddd53b3..c398b8478d62 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2051,6 +2051,7 @@ struct tg3 { u32 (*read32) (struct tg3 *, u32); void (*write32) (struct tg3 *, u32, u32); + u32 (*read32_mbox) (struct tg3 *, u32); void (*write32_mbox) (struct tg3 *, u32, u32); void __iomem *regs; From 6892914fb7980d844f2bac859f4095df9ebd18da Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:17:14 -0700 Subject: [PATCH 337/584] [TG3]: Add indirect register method for 5703 behind ICH This patch adds the new workaround for 5703 A1/A2 if it is behind certain ICH bridges. The workaround disables memory and uses config. cycles only to access all registers. The 5702/03 chips can mistakenly decode the special cycles from the ICH chipsets as memory write cycles, causing corruption of register and memory space. Only certain ICH bridges will drive special cycles with non-zero data during the address phase which can fall within the 5703's address range. This is not an ICH bug as the PCI spec allows non-zero address during special cycles. However, only these ICH bridges are known to drive non-zero addresses during special cycles. The indirect_lock is also changed to spin_lock_irqsave from spin_lock_bh because it is used in irq handler when using the indirect method to disable interrupts. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 190 +++++++++++++++++++++++++++++++++++++++------- drivers/net/tg3.h | 1 + 2 files changed, 163 insertions(+), 28 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 8411e0f4cb69..3a7cfb81bf89 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -340,10 +340,12 @@ static struct { static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) { - spin_lock_bh(&tp->indirect_lock); + unsigned long flags; + + spin_lock_irqsave(&tp->indirect_lock, flags); pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); - spin_unlock_bh(&tp->indirect_lock); + spin_unlock_irqrestore(&tp->indirect_lock, flags); } static void tg3_write_flush_reg32(struct tg3 *tp, u32 off, u32 val) @@ -352,24 +354,75 @@ static void tg3_write_flush_reg32(struct tg3 *tp, u32 off, u32 val) readl(tp->regs + off); } +static u32 tg3_read_indirect_reg32(struct tg3 *tp, u32 off) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&tp->indirect_lock, flags); + pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); + pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val); + spin_unlock_irqrestore(&tp->indirect_lock, flags); + return val; +} + +static void tg3_write_indirect_mbox(struct tg3 *tp, u32 off, u32 val) +{ + unsigned long flags; + + if (off == (MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW)) { + pci_write_config_dword(tp->pdev, TG3PCI_RCV_RET_RING_CON_IDX + + TG3_64BIT_REG_LOW, val); + return; + } + if (off == (MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW)) { + pci_write_config_dword(tp->pdev, TG3PCI_STD_RING_PROD_IDX + + TG3_64BIT_REG_LOW, val); + return; + } + + spin_lock_irqsave(&tp->indirect_lock, flags); + pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600); + pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); + spin_unlock_irqrestore(&tp->indirect_lock, flags); + + /* In indirect mode when disabling interrupts, we also need + * to clear the interrupt bit in the GRC local ctrl register. + */ + if ((off == (MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW)) && + (val == 0x1)) { + pci_write_config_dword(tp->pdev, TG3PCI_MISC_LOCAL_CTRL, + tp->grc_local_ctrl|GRC_LCLCTRL_CLEARINT); + } +} + +static u32 tg3_read_indirect_mbox(struct tg3 *tp, u32 off) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&tp->indirect_lock, flags); + pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600); + pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val); + spin_unlock_irqrestore(&tp->indirect_lock, flags); + return val; +} + static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) { - if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { - spin_lock_bh(&tp->indirect_lock); - pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); - pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); - spin_unlock_bh(&tp->indirect_lock); - } else { - void __iomem *dest = tp->regs + off; - writel(val, dest); - readl(dest); /* always flush PCI write */ - } + tp->write32(tp, off, val); + if (!(tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) && + !(tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) && + !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND)) + tp->read32(tp, off); /* flush */ } static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val) { tp->write32_mbox(tp, off, val); - tp->read32_mbox(tp, off); + if (!(tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) && + !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND)) + tp->read32_mbox(tp, off); } static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) @@ -404,24 +457,28 @@ static u32 tg3_read32(struct tg3 *tp, u32 off) static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) { - spin_lock_bh(&tp->indirect_lock); + unsigned long flags; + + spin_lock_irqsave(&tp->indirect_lock, flags); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); /* Always leave this as zero. */ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); - spin_unlock_bh(&tp->indirect_lock); + spin_unlock_irqrestore(&tp->indirect_lock, flags); } static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val) { - spin_lock_bh(&tp->indirect_lock); + unsigned long flags; + + spin_lock_irqsave(&tp->indirect_lock, flags); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); /* Always leave this as zero. */ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); - spin_unlock_bh(&tp->indirect_lock); + spin_unlock_irqrestore(&tp->indirect_lock, flags); } static void tg3_disable_ints(struct tg3 *tp) @@ -9149,14 +9206,6 @@ static int __devinit tg3_is_sun_570X(struct tg3 *tp) static int __devinit tg3_get_invariants(struct tg3 *tp) { static struct pci_device_id write_reorder_chipsets[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801AA_8) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801AB_8) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801BA_11) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801BA_6) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C) }, { }, @@ -9173,7 +9222,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags2 |= TG3_FLG2_SUN_570X; #endif - /* If we have an AMD 762 or Intel ICH/ICH0/ICH2 chipset, write + /* If we have an AMD 762 chipset, write * reordering to the mailbox registers done by the host * controller can cause major troubles. We read back from * every mailbox register write to force the writes to be @@ -9211,6 +9260,69 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW) tp->pci_chip_rev_id = CHIPREV_ID_5752_A0; + /* If we have 5702/03 A1 or A2 on certain ICH chipsets, + * we need to disable memory and use config. cycles + * only to access all registers. The 5702/03 chips + * can mistakenly decode the special cycles from the + * ICH chipsets as memory write cycles, causing corruption + * of register and memory space. Only certain ICH bridges + * will drive special cycles with non-zero data during the + * address phase which can fall within the 5703's address + * range. This is not an ICH bug as the PCI spec allows + * non-zero address during special cycles. However, only + * these ICH bridges are known to drive non-zero addresses + * during special cycles. + * + * Since special cycles do not cross PCI bridges, we only + * enable this workaround if the 5703 is on the secondary + * bus of these ICH bridges. + */ + if ((tp->pci_chip_rev_id == CHIPREV_ID_5703_A1) || + (tp->pci_chip_rev_id == CHIPREV_ID_5703_A2)) { + static struct tg3_dev_id { + u32 vendor; + u32 device; + u32 rev; + } ich_chipsets[] = { + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_8, + PCI_ANY_ID }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_8, + PCI_ANY_ID }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_11, + 0xa }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_6, + PCI_ANY_ID }, + { }, + }; + struct tg3_dev_id *pci_id = &ich_chipsets[0]; + struct pci_dev *bridge = NULL; + + while (pci_id->vendor != 0) { + bridge = pci_get_device(pci_id->vendor, pci_id->device, + bridge); + if (!bridge) { + pci_id++; + continue; + } + if (pci_id->rev != PCI_ANY_ID) { + u8 rev; + + pci_read_config_byte(bridge, PCI_REVISION_ID, + &rev); + if (rev > pci_id->rev) + continue; + } + if (bridge->subordinate && + (bridge->subordinate->number == + tp->pdev->bus->number)) { + + tp->tg3_flags2 |= TG3_FLG2_ICH_WORKAROUND; + pci_dev_put(bridge); + break; + } + } + } + /* Find msi capability. */ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780) tp->msi_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_MSI); @@ -9342,6 +9454,22 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->write32_rx_mbox = tg3_write_flush_reg32; } + if (tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND) { + tp->read32 = tg3_read_indirect_reg32; + tp->write32 = tg3_write_indirect_reg32; + tp->read32_mbox = tg3_read_indirect_mbox; + tp->write32_mbox = tg3_write_indirect_mbox; + tp->write32_tx_mbox = tg3_write_indirect_mbox; + tp->write32_rx_mbox = tg3_write_indirect_mbox; + + iounmap(tp->regs); + tp->regs = 0; + + pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); + pci_cmd &= ~PCI_COMMAND_MEMORY; + pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd); + } + /* Get eeprom hw config before calling tg3_set_power_state(). * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be * determined before calling tg3_set_power_state() so that @@ -10486,7 +10614,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, return 0; err_out_iounmap: - iounmap(tp->regs); + if (tp->regs) { + iounmap(tp->regs); + tp->regs = 0; + } err_out_free_dev: free_netdev(dev); @@ -10508,7 +10639,10 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) struct tg3 *tp = netdev_priv(dev); unregister_netdev(dev); - iounmap(tp->regs); + if (tp->regs) { + iounmap(tp->regs); + tp->regs = 0; + } free_netdev(dev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index c398b8478d62..c184b773e585 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2174,6 +2174,7 @@ struct tg3 { #define TG3_FLG2_ANY_SERDES (TG3_FLG2_PHY_SERDES | \ TG3_FLG2_MII_SERDES) #define TG3_FLG2_PARALLEL_DETECT 0x01000000 +#define TG3_FLG2_ICH_WORKAROUND 0x02000000 u32 split_mode_max_reqs; #define SPLIT_MODE_5704_MAX_REQ 3 From 15f5a585c6b8dac31ed0a55693aacf51934f0f5d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:17:28 -0700 Subject: [PATCH 338/584] [TG3]: Eliminate one register write in tg3_restart_ints() The register write to register 0x68 to restart interrupts is unnecessary as the interrupt wasn't masked in that register by the irq handler. This will save one register write in the fast path. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 3a7cfb81bf89..8bc28b14c70f 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -533,8 +533,6 @@ static inline unsigned int tg3_has_work(struct tg3 *tp) */ static void tg3_restart_ints(struct tg3 *tp) { - tw32(TG3PCI_MISC_HOST_CTRL, - (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, tp->last_tag << 24); mmiowb(); From 087fe256f0aef8d16b19a30c6fb10b899bf1a701 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Aug 2005 20:17:41 -0700 Subject: [PATCH 339/584] [TG3]: Fix bug in setting a tg3_flag Found a bug while reviewing the patches the second time. The TG3_FLAG_TXD_MBOX_HWBUG flag is set after the register access methods have been determined. This patch fixes it by moving it up before the various access methods are assigned. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 8bc28b14c70f..af8263a1580e 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -9408,6 +9408,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) } } + /* 5700 BX chips need to have their TX producer index mailboxes + * written twice to workaround a bug. + */ + if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) + tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG; + /* Back to back register writes can cause problems on this chip, * the workaround is to read back all reg writes except those to * mailbox regs. See tg3_write_indirect_reg32(). @@ -9682,14 +9688,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) else tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; - /* 5700 BX chips need to have their TX producer index mailboxes - * written twice to workaround a bug. - */ - if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) - tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG; - else - tp->tg3_flags &= ~TG3_FLAG_TXD_MBOX_HWBUG; - /* It seems all chips can get confused if TX buffers * straddle the 4GB address boundary in some cases. */ From bb97d31f5130d677644d9931ef38613d1164ec94 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:19:14 -0700 Subject: [PATCH 340/584] [INET]: Make inet_create try to load protocol modules Syntax is net-pf-PROTOCOL_FAMILY-PROTOCOL-SOCK_TYPE and if this fails net-pf-PROTOCOL_FAMILY-PROTOCOL. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 9 +++++++-- net/ipv4/af_inet.c | 29 +++++++++++++++++++++++++---- net/sctp/protocol.c | 4 ++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 70284e6afe05..66c43fce17a6 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -811,8 +811,13 @@ static void __exit dccp_fini(void) module_init(dccp_init); module_exit(dccp_fini); -/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); +/* + * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) + * values directly, Also cover the case where the protocol is not specified, + * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP + */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 52f5ecc58c46..20f52b5f5dea 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -228,12 +228,14 @@ static int inet_create(struct socket *sock, int protocol) struct proto *answer_prot; unsigned char answer_flags; char answer_no_check; - int err; + int try_loading_module = 0; + int err = -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ answer = NULL; +lookup_protocol: rcu_read_lock(); list_for_each_rcu(p, &inetsw[sock->type]) { answer = list_entry(p, struct inet_protosw, list); @@ -254,9 +256,28 @@ static int inet_create(struct socket *sock, int protocol) answer = NULL; } - err = -ESOCKTNOSUPPORT; - if (!answer) - goto out_rcu_unlock; + if (unlikely(answer == NULL)) { + if (try_loading_module < 2) { + rcu_read_unlock(); + /* + * Be more specific, e.g. net-pf-2-proto-132-type-1 + * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) + */ + if (++try_loading_module == 1) + request_module("net-pf-%d-proto-%d-type-%d", + PF_INET, protocol, sock->type); + /* + * Fall back to generic, e.g. net-pf-2-proto-132 + * (net-pf-PF_INET-proto-IPPROTO_SCTP) + */ + else + request_module("net-pf-%d-proto-%d", + PF_INET, protocol); + goto lookup_protocol; + } else + goto out_rcu_unlock; + } + err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8d3f8096b873..7d8ec6526347 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1242,6 +1242,10 @@ SCTP_STATIC __exit void sctp_exit(void) module_init(sctp_init); module_exit(sctp_exit); +/* + * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly. + */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); MODULE_AUTHOR("Linux Kernel SCTP developers "); MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); MODULE_LICENSE("GPL"); From 2669d63d20683828f673b606915957f3a070602d Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:19:44 -0700 Subject: [PATCH 341/584] [NETFILTER]: move conntrack helper buffers from BSS to kmalloc()ed memory According to DaveM, it is preferrable to have large data structures be allocated dynamically from the module init() function rather than putting them as static global variables into BSS. This patch moves the conntrack helper packet buffers into dynamically allocated memory. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_amanda.c | 18 ++++++++++++++++-- net/ipv4/netfilter/ip_conntrack_ftp.c | 9 +++++++-- net/ipv4/netfilter/ip_conntrack_irc.c | 7 ++++++- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 01e1b58322a9..be4c9eb3243f 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); static char *conns[] = { "DATA ", "MESG ", "INDEX " }; /* This is slow, but it's simple. --RR */ -static char amanda_buffer[65536]; +static char *amanda_buffer; static DEFINE_SPINLOCK(amanda_buffer_lock); unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, @@ -153,11 +153,25 @@ static struct ip_conntrack_helper amanda_helper = { static void __exit fini(void) { ip_conntrack_helper_unregister(&amanda_helper); + kfree(amanda_buffer); } static int __init init(void) { - return ip_conntrack_helper_register(&amanda_helper); + int ret; + + amanda_buffer = kmalloc(65536, GFP_KERNEL); + if (!amanda_buffer) + return -ENOMEM; + + ret = ip_conntrack_helper_register(&amanda_helper); + if (ret < 0) { + kfree(amanda_buffer); + return ret; + } + return 0; + + } module_init(init); diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 9658896f899a..3a2627db1729 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -25,8 +25,7 @@ MODULE_AUTHOR("Rusty Russell "); MODULE_DESCRIPTION("ftp connection tracking helper"); /* This is slow, but it's simple. --RR */ -static char ftp_buffer[65536]; - +static char *ftp_buffer; static DEFINE_SPINLOCK(ip_ftp_lock); #define MAX_PORTS 8 @@ -461,6 +460,8 @@ static void fini(void) ports[i]); ip_conntrack_helper_unregister(&ftp[i]); } + + kfree(ftp_buffer); } static int __init init(void) @@ -468,6 +469,10 @@ static int __init init(void) int i, ret; char *tmpname; + ftp_buffer = kmalloc(65536, GFP_KERNEL); + if (!ftp_buffer) + return -ENOMEM; + if (ports_c == 0) ports[ports_c++] = FTP_PORT; diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 4a28f297d502..25438eec21a1 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -39,7 +39,7 @@ static int ports_c; static int max_dcc_channels = 8; static unsigned int dcc_timeout = 300; /* This is slow, but it's simple. --RR */ -static char irc_buffer[65536]; +static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, @@ -257,6 +257,10 @@ static int __init init(void) printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n"); return -EBUSY; } + + irc_buffer = kmalloc(65536, GFP_KERNEL); + if (!irc_buffer) + return -ENOMEM; /* If no port given, default to standard irc port */ if (ports_c == 0) @@ -304,6 +308,7 @@ static void fini(void) ports[i]); ip_conntrack_helper_unregister(&irc_helpers[i]); } + kfree(irc_buffer); } module_init(init); From 91483c4b711549bff5e9069e25c4c1400b135198 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Aug 2005 20:20:07 -0700 Subject: [PATCH 342/584] [SUNRPC]: svcsock.c needs linux/tcp.h Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sunrpc/svcsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e750cb685cb6..199d3747bd42 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include From f682faefb8c6045468c4cf0fe435128352683c22 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:20:34 -0700 Subject: [PATCH 343/584] [NETFILTER]: fix autoloading of nfnetlink_log This patch adds the MODULE_ALIAS required for netnlink autoloading of nfnetlink_log. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/netfilter/nfnetlink_log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 1750f0d6e4de..11584289c262 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -991,6 +991,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("netfilter userspace logging"); MODULE_AUTHOR("Harald Welte "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); module_init(init); module_exit(fini); From 210a9ebef2d1bd32d9e9d81c84d538e237769cdb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:20:54 -0700 Subject: [PATCH 344/584] [NETFILTER]: ip{6}_queue: prevent unregistration race with nfnetlink_queue Since nfnetlink_queue can override ip{6}_queue as queue handlers, we can no longer blindly unregister whoever is registered for PF_INET[6], but only unregister ourselves. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index cfc886f382ac..629de649f130 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -692,7 +692,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handler(PF_INET); + nf_unregister_queue_handlers(&ipq_enqueue_packet); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5af4cee93d9b..56ffec3568fa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -687,7 +687,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handler(PF_INET6); + nf_unregister_queue_handlers(&ipq_enqueue_packet); synchronize_net(); ipq_flush(NF_DROP); From f6ebe77f955d77a988ce726f0818ec0103b11323 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:21:49 -0700 Subject: [PATCH 345/584] [NETFILTER]: split net/core/netfilter.c into net/netfilter/*.c This patch doesn't introduce any code changes, but merely splits the core netfilter code into four separate files. It also moves it from it's old location in net/core/ to the recently-created net/netfilter/ directory. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/core/Makefile | 1 - net/core/netfilter.c | 737 ----------------------------------- net/netfilter/Makefile | 4 + net/netfilter/core.c | 216 ++++++++++ net/netfilter/nf_internals.h | 39 ++ net/netfilter/nf_log.c | 165 ++++++++ net/netfilter/nf_queue.c | 273 +++++++++++++ net/netfilter/nf_sockopt.c | 132 +++++++ 8 files changed, 829 insertions(+), 738 deletions(-) delete mode 100644 net/core/netfilter.c create mode 100644 net/netfilter/core.c create mode 100644 net/netfilter/nf_internals.h create mode 100644 net/netfilter/nf_log.c create mode 100644 net/netfilter/nf_queue.c create mode 100644 net/netfilter/nf_sockopt.c diff --git a/net/core/Makefile b/net/core/Makefile index f5f5e58943e8..630da0f0579e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o \ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o -obj-$(CONFIG_NETFILTER) += netfilter.o obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NET_RADIO) += wireless.o diff --git a/net/core/netfilter.c b/net/core/netfilter.c deleted file mode 100644 index 98cc61e79fea..000000000000 --- a/net/core/netfilter.c +++ /dev/null @@ -1,737 +0,0 @@ -/* netfilter.c: look after the filters for various protocols. - * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. - * - * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any - * way. - * - * Rusty Russell (C)2000 -- This code is GPL. - * - * February 2000: Modified by James Morris to have 1 queue per protocol. - * 15-Mar-2000: Added NF_REPEAT --RR. - * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* In this code, we can be waiting indefinitely for userspace to - * service a packet if a hook returns NF_QUEUE. We could keep a count - * of skbuffs queued for userspace, and not deregister a hook unless - * this is zero, but that sucks. Now, we simply check when the - * packets come back: if the hook is gone, the packet is discarded. */ -#ifdef CONFIG_NETFILTER_DEBUG -#define NFDEBUG(format, args...) printk(format , ## args) -#else -#define NFDEBUG(format, args...) -#endif - -/* Sockopts only registered and called from user context, so - net locking would be overkill. Also, [gs]etsockopt calls may - sleep. */ -static DECLARE_MUTEX(nf_sockopt_mutex); - -struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -static LIST_HEAD(nf_sockopts); -static DEFINE_SPINLOCK(nf_hook_lock); - -/* - * A queue handler may be registered for each protocol. Each is protected by - * long term mutex. The handler must provide an an outfn() to accept packets - * for queueing and must reinject all packets it receives, no matter what. - */ -static struct nf_queue_handler_t { - nf_queue_outfn_t outfn; - void *data; -} queue_handler[NPROTO]; - -static struct nf_queue_rerouter *queue_rerouter; - -static DEFINE_RWLOCK(queue_handler_lock); - -int nf_register_hook(struct nf_hook_ops *reg) -{ - struct list_head *i; - - spin_lock_bh(&nf_hook_lock); - list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { - if (reg->priority < ((struct nf_hook_ops *)i)->priority) - break; - } - list_add_rcu(®->list, i->prev); - spin_unlock_bh(&nf_hook_lock); - - synchronize_net(); - return 0; -} - -void nf_unregister_hook(struct nf_hook_ops *reg) -{ - spin_lock_bh(&nf_hook_lock); - list_del_rcu(®->list); - spin_unlock_bh(&nf_hook_lock); - - synchronize_net(); -} - -/* Do exclusive ranges overlap? */ -static inline int overlap(int min1, int max1, int min2, int max2) -{ - return max1 > min2 && min1 < max2; -} - -/* Functions to register sockopt ranges (exclusive). */ -int nf_register_sockopt(struct nf_sockopt_ops *reg) -{ - struct list_head *i; - int ret = 0; - - if (down_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - - list_for_each(i, &nf_sockopts) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; - if (ops->pf == reg->pf - && (overlap(ops->set_optmin, ops->set_optmax, - reg->set_optmin, reg->set_optmax) - || overlap(ops->get_optmin, ops->get_optmax, - reg->get_optmin, reg->get_optmax))) { - NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", - ops->set_optmin, ops->set_optmax, - ops->get_optmin, ops->get_optmax, - reg->set_optmin, reg->set_optmax, - reg->get_optmin, reg->get_optmax); - ret = -EBUSY; - goto out; - } - } - - list_add(®->list, &nf_sockopts); -out: - up(&nf_sockopt_mutex); - return ret; -} - -void nf_unregister_sockopt(struct nf_sockopt_ops *reg) -{ - /* No point being interruptible: we're probably in cleanup_module() */ - restart: - down(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - up(&nf_sockopt_mutex); - schedule(); - goto restart; - } - list_del(®->list); - up(&nf_sockopt_mutex); -} - -/* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, - char __user *opt, int *len, int get) -{ - struct list_head *i; - struct nf_sockopt_ops *ops; - int ret; - - if (down_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; - if (ops->pf == pf) { - if (get) { - if (val >= ops->get_optmin - && val < ops->get_optmax) { - ops->use++; - up(&nf_sockopt_mutex); - ret = ops->get(sk, val, opt, len); - goto out; - } - } else { - if (val >= ops->set_optmin - && val < ops->set_optmax) { - ops->use++; - up(&nf_sockopt_mutex); - ret = ops->set(sk, val, opt, *len); - goto out; - } - } - } - } - up(&nf_sockopt_mutex); - return -ENOPROTOOPT; - - out: - down(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - up(&nf_sockopt_mutex); - return ret; -} - -int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, - int len) -{ - return nf_sockopt(sk, pf, val, opt, &len, 0); -} - -int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) -{ - return nf_sockopt(sk, pf, val, opt, len, 1); -} - -static unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, - int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct list_head **i, - int (*okfn)(struct sk_buff *), - int hook_thresh) -{ - unsigned int verdict; - - /* - * The caller must not block between calls to this - * function because of risk of continuing from deleted element. - */ - list_for_each_continue_rcu(*i, head) { - struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; - - if (hook_thresh > elem->priority) - continue; - - /* Optimization: we don't need to hold module - reference here, since function can't sleep. --RR */ - verdict = elem->hook(hook, skb, indev, outdev, okfn); - if (verdict != NF_ACCEPT) { -#ifdef CONFIG_NETFILTER_DEBUG - if (unlikely((verdict & NF_VERDICT_MASK) - > NF_MAX_VERDICT)) { - NFDEBUG("Evil return from %p(%u).\n", - elem->hook, hook); - continue; - } -#endif - if (verdict != NF_REPEAT) - return verdict; - *i = (*i)->prev; - } - } - return NF_ACCEPT; -} - -int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) -{ - int ret; - - if (pf >= NPROTO) - return -EINVAL; - - write_lock_bh(&queue_handler_lock); - if (queue_handler[pf].outfn) - ret = -EBUSY; - else { - queue_handler[pf].outfn = outfn; - queue_handler[pf].data = data; - ret = 0; - } - write_unlock_bh(&queue_handler_lock); - - return ret; -} - -/* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf) -{ - if (pf >= NPROTO) - return -EINVAL; - - write_lock_bh(&queue_handler_lock); - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - write_unlock_bh(&queue_handler_lock); - - return 0; -} - -int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) -{ - if (pf >= NPROTO) - return -EINVAL; - - write_lock_bh(&queue_handler_lock); - memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); - write_unlock_bh(&queue_handler_lock); - - return 0; -} - -int nf_unregister_queue_rerouter(int pf) -{ - if (pf >= NPROTO) - return -EINVAL; - - write_lock_bh(&queue_handler_lock); - memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); - write_unlock_bh(&queue_handler_lock); - return 0; -} - -void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) -{ - int pf; - - write_lock_bh(&queue_handler_lock); - for (pf = 0; pf < NPROTO; pf++) { - if (queue_handler[pf].outfn == outfn) { - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - } - } - write_unlock_bh(&queue_handler_lock); -} - -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -static int nf_queue(struct sk_buff **skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) -{ - int status; - struct nf_info *info; -#ifdef CONFIG_BRIDGE_NETFILTER - struct net_device *physindev = NULL; - struct net_device *physoutdev = NULL; -#endif - - /* QUEUE == DROP if noone is waiting, to be safe. */ - read_lock(&queue_handler_lock); - if (!queue_handler[pf].outfn) { - read_unlock(&queue_handler_lock); - kfree_skb(*skb); - return 1; - } - - info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); - if (!info) { - if (net_ratelimit()) - printk(KERN_ERR "OOM queueing packet %p\n", - *skb); - read_unlock(&queue_handler_lock); - kfree_skb(*skb); - return 1; - } - - *info = (struct nf_info) { - (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; - - /* If it's going away, ignore hook. */ - if (!try_module_get(info->elem->owner)) { - read_unlock(&queue_handler_lock); - kfree(info); - return 0; - } - - /* Bump dev refs so they don't vanish while packet is out */ - if (indev) dev_hold(indev); - if (outdev) dev_hold(outdev); - -#ifdef CONFIG_BRIDGE_NETFILTER - if ((*skb)->nf_bridge) { - physindev = (*skb)->nf_bridge->physindev; - if (physindev) dev_hold(physindev); - physoutdev = (*skb)->nf_bridge->physoutdev; - if (physoutdev) dev_hold(physoutdev); - } -#endif - if (queue_rerouter[pf].save) - queue_rerouter[pf].save(*skb, info); - - status = queue_handler[pf].outfn(*skb, info, queuenum, - queue_handler[pf].data); - - if (status >= 0 && queue_rerouter[pf].reroute) - status = queue_rerouter[pf].reroute(skb, info); - - read_unlock(&queue_handler_lock); - - if (status < 0) { - /* James M doesn't say fuck enough. */ - if (indev) dev_put(indev); - if (outdev) dev_put(outdev); -#ifdef CONFIG_BRIDGE_NETFILTER - if (physindev) dev_put(physindev); - if (physoutdev) dev_put(physoutdev); -#endif - module_put(info->elem->owner); - kfree(info); - kfree_skb(*skb); - - return 1; - } - - return 1; -} - -/* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - int hook_thresh) -{ - struct list_head *elem; - unsigned int verdict; - int ret = 0; - - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - - elem = &nf_hooks[pf][hook]; -next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, - outdev, &elem, okfn, hook_thresh); - if (verdict == NF_ACCEPT || verdict == NF_STOP) { - ret = 1; - goto unlock; - } else if (verdict == NF_DROP) { - kfree_skb(*pskb); - ret = -EPERM; - } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_BITS)) - goto next_hook; - } -unlock: - rcu_read_unlock(); - return ret; -} - -void nf_reinject(struct sk_buff *skb, struct nf_info *info, - unsigned int verdict) -{ - struct list_head *elem = &info->elem->list; - struct list_head *i; - - rcu_read_lock(); - - /* Release those devices we held, or Alexey will kill me. */ - if (info->indev) dev_put(info->indev); - if (info->outdev) dev_put(info->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - if (skb->nf_bridge->physindev) - dev_put(skb->nf_bridge->physindev); - if (skb->nf_bridge->physoutdev) - dev_put(skb->nf_bridge->physoutdev); - } -#endif - - /* Drop reference to owner of hook which queued us. */ - module_put(info->elem->owner); - - list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { - if (i == elem) - break; - } - - if (elem == &nf_hooks[info->pf][info->hook]) { - /* The module which sent it to userspace is gone. */ - NFDEBUG("%s: module disappeared, dropping packet.\n", - __FUNCTION__); - verdict = NF_DROP; - } - - /* Continue traversal iff userspace said ok... */ - if (verdict == NF_REPEAT) { - elem = elem->prev; - verdict = NF_ACCEPT; - } - - if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - &skb, info->hook, - info->indev, info->outdev, &elem, - info->okfn, INT_MIN); - } - - switch (verdict & NF_VERDICT_MASK) { - case NF_ACCEPT: - info->okfn(skb); - break; - - case NF_QUEUE: - if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn, - verdict >> NF_VERDICT_BITS)) - goto next_hook; - break; - } - rcu_read_unlock(); - - if (verdict == NF_DROP) - kfree_skb(skb); - - kfree(info); - return; -} - -int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) -{ - struct sk_buff *nskb; - - if (writable_len > (*pskb)->len) - return 0; - - /* Not exclusive use of packet? Must copy. */ - if (skb_shared(*pskb) || skb_cloned(*pskb)) - goto copy_skb; - - return pskb_may_pull(*pskb, writable_len); - -copy_skb: - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return 0; - BUG_ON(skb_is_nonlinear(nskb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; -} -EXPORT_SYMBOL(skb_make_writable); - -/* Internal logging interface, which relies on the real - LOG target modules */ - -#define NF_LOG_PREFIXLEN 128 - -static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ -static DEFINE_SPINLOCK(nf_log_lock); - -int nf_log_register(int pf, struct nf_logger *logger) -{ - int ret = -EBUSY; - - /* Any setup of logging members must be done before - * substituting pointer. */ - spin_lock(&nf_log_lock); - if (!nf_logging[pf]) { - rcu_assign_pointer(nf_logging[pf], logger); - ret = 0; - } - spin_unlock(&nf_log_lock); - return ret; -} - -void nf_log_unregister_pf(int pf) -{ - spin_lock(&nf_log_lock); - nf_logging[pf] = NULL; - spin_unlock(&nf_log_lock); - - /* Give time to concurrent readers. */ - synchronize_net(); -} - -void nf_log_unregister_logger(struct nf_logger *logger) -{ - int i; - - spin_lock(&nf_log_lock); - for (i = 0; i < NPROTO; i++) { - if (nf_logging[i] == logger) - nf_logging[i] = NULL; - } - spin_unlock(&nf_log_lock); - - synchronize_net(); -} - -void nf_log_packet(int pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - struct nf_loginfo *loginfo, - const char *fmt, ...) -{ - va_list args; - char prefix[NF_LOG_PREFIXLEN]; - struct nf_logger *logger; - - rcu_read_lock(); - logger = rcu_dereference(nf_logging[pf]); - if (logger) { - va_start(args, fmt); - vsnprintf(prefix, sizeof(prefix), fmt, args); - va_end(args); - /* We must read logging before nf_logfn[pf] */ - logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); - } else if (net_ratelimit()) { - printk(KERN_WARNING "nf_log_packet: can\'t log since " - "no backend logging module loaded in! Please either " - "load one, or disable logging explicitly\n"); - } - rcu_read_unlock(); -} -EXPORT_SYMBOL(nf_log_register); -EXPORT_SYMBOL(nf_log_unregister_pf); -EXPORT_SYMBOL(nf_log_unregister_logger); -EXPORT_SYMBOL(nf_log_packet); - -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); - -static void *seq_start(struct seq_file *seq, loff_t *pos) -{ - rcu_read_lock(); - - if (*pos >= NPROTO) - return NULL; - - return pos; -} - -static void *seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - - if (*pos >= NPROTO) - return NULL; - - return pos; -} - -static void seq_stop(struct seq_file *s, void *v) -{ - rcu_read_unlock(); -} - -static int seq_show(struct seq_file *s, void *v) -{ - loff_t *pos = v; - const struct nf_logger *logger; - - logger = rcu_dereference(nf_logging[*pos]); - - if (!logger) - return seq_printf(s, "%2lld NONE\n", *pos); - - return seq_printf(s, "%2lld %s\n", *pos, logger->name); -} - -static struct seq_operations nflog_seq_ops = { - .start = seq_start, - .next = seq_next, - .stop = seq_stop, - .show = seq_show, -}; - -static int nflog_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &nflog_seq_ops); -} - -static struct file_operations nflog_file_ops = { - .owner = THIS_MODULE, - .open = nflog_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#endif /* PROC_FS */ - - -/* This does not belong here, but locally generated errors need it if connection - tracking in use: without this, connection may not be in hash table, and hence - manufactured ICMP or RST packets will not be associated with it. */ -void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); - -void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) -{ - void (*attach)(struct sk_buff *, struct sk_buff *); - - if (skb->nfct && (attach = ip_ct_attach) != NULL) { - mb(); /* Just to be sure: must be read before executing this */ - attach(new, skb); - } -} - -void __init netfilter_init(void) -{ - int i, h; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; -#endif - - queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), - GFP_KERNEL); - if (!queue_rerouter) - panic("netfilter: cannot allocate queue rerouter array\n"); - memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); - - for (i = 0; i < NPROTO; i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&nf_hooks[i][h]); - } - -#ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); - if (!proc_net_netfilter) - panic("cannot create netfilter proc entry"); - pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); - if (!pde) - panic("cannot create /proc/net/netfilter/nf_log"); - pde->proc_fops = &nflog_file_ops; -#endif -} - -EXPORT_SYMBOL(ip_ct_attach); -EXPORT_SYMBOL(nf_ct_attach); -EXPORT_SYMBOL(nf_getsockopt); -EXPORT_SYMBOL(nf_hook_slow); -EXPORT_SYMBOL(nf_hooks); -EXPORT_SYMBOL(nf_register_hook); -EXPORT_SYMBOL(nf_register_queue_handler); -EXPORT_SYMBOL(nf_register_sockopt); -EXPORT_SYMBOL(nf_reinject); -EXPORT_SYMBOL(nf_setsockopt); -EXPORT_SYMBOL(nf_unregister_hook); -EXPORT_SYMBOL(nf_unregister_queue_handler); -EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); -EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); -EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); -EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c41caebc4a7c..b3b44f8b415a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,3 +1,7 @@ +netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o + +obj-$(CONFIG_NETFILTER) = netfilter.o + obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c new file mode 100644 index 000000000000..1ceb1a6c254b --- /dev/null +++ b/net/netfilter/core.c @@ -0,0 +1,216 @@ +/* netfilter.c: look after the filters for various protocols. + * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. + * + * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any + * way. + * + * Rusty Russell (C)2000 -- This code is GPL. + * + * February 2000: Modified by James Morris to have 1 queue per protocol. + * 15-Mar-2000: Added NF_REPEAT --RR. + * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nf_internals.h" + +/* In this code, we can be waiting indefinitely for userspace to + * service a packet if a hook returns NF_QUEUE. We could keep a count + * of skbuffs queued for userspace, and not deregister a hook unless + * this is zero, but that sucks. Now, we simply check when the + * packets come back: if the hook is gone, the packet is discarded. */ +struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +EXPORT_SYMBOL(nf_hooks); +static DEFINE_SPINLOCK(nf_hook_lock); + +int nf_register_hook(struct nf_hook_ops *reg) +{ + struct list_head *i; + + spin_lock_bh(&nf_hook_lock); + list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { + if (reg->priority < ((struct nf_hook_ops *)i)->priority) + break; + } + list_add_rcu(®->list, i->prev); + spin_unlock_bh(&nf_hook_lock); + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL(nf_register_hook); + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + spin_lock_bh(&nf_hook_lock); + list_del_rcu(®->list); + spin_unlock_bh(&nf_hook_lock); + + synchronize_net(); +} +EXPORT_SYMBOL(nf_unregister_hook); + +unsigned int nf_iterate(struct list_head *head, + struct sk_buff **skb, + int hook, + const struct net_device *indev, + const struct net_device *outdev, + struct list_head **i, + int (*okfn)(struct sk_buff *), + int hook_thresh) +{ + unsigned int verdict; + + /* + * The caller must not block between calls to this + * function because of risk of continuing from deleted element. + */ + list_for_each_continue_rcu(*i, head) { + struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; + + if (hook_thresh > elem->priority) + continue; + + /* Optimization: we don't need to hold module + reference here, since function can't sleep. --RR */ + verdict = elem->hook(hook, skb, indev, outdev, okfn); + if (verdict != NF_ACCEPT) { +#ifdef CONFIG_NETFILTER_DEBUG + if (unlikely((verdict & NF_VERDICT_MASK) + > NF_MAX_VERDICT)) { + NFDEBUG("Evil return from %p(%u).\n", + elem->hook, hook); + continue; + } +#endif + if (verdict != NF_REPEAT) + return verdict; + *i = (*i)->prev; + } + } + return NF_ACCEPT; +} + + +/* Returns 1 if okfn() needs to be executed by the caller, + * -EPERM for NF_DROP, 0 otherwise. */ +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + int hook_thresh) +{ + struct list_head *elem; + unsigned int verdict; + int ret = 0; + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + + elem = &nf_hooks[pf][hook]; +next_hook: + verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, + outdev, &elem, okfn, hook_thresh); + if (verdict == NF_ACCEPT || verdict == NF_STOP) { + ret = 1; + goto unlock; + } else if (verdict == NF_DROP) { + kfree_skb(*pskb); + ret = -EPERM; + } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { + NFDEBUG("nf_hook: Verdict = QUEUE.\n"); + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS)) + goto next_hook; + } +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(nf_hook_slow); + + +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) +{ + struct sk_buff *nskb; + + if (writable_len > (*pskb)->len) + return 0; + + /* Not exclusive use of packet? Must copy. */ + if (skb_shared(*pskb) || skb_cloned(*pskb)) + goto copy_skb; + + return pskb_may_pull(*pskb, writable_len); + +copy_skb: + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return 0; + BUG_ON(skb_is_nonlinear(nskb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; +} +EXPORT_SYMBOL(skb_make_writable); + + +/* This does not belong here, but locally generated errors need it if connection + tracking in use: without this, connection may not be in hash table, and hence + manufactured ICMP or RST packets will not be associated with it. */ +void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); +EXPORT_SYMBOL(ip_ct_attach); + +void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) +{ + void (*attach)(struct sk_buff *, struct sk_buff *); + + if (skb->nfct && (attach = ip_ct_attach) != NULL) { + mb(); /* Just to be sure: must be read before executing this */ + attach(new, skb); + } +} +EXPORT_SYMBOL(nf_ct_attach); + +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_netfilter; +EXPORT_SYMBOL(proc_net_netfilter); +#endif + +void __init netfilter_init(void) +{ + int i, h; + for (i = 0; i < NPROTO; i++) { + for (h = 0; h < NF_MAX_HOOKS; h++) + INIT_LIST_HEAD(&nf_hooks[i][h]); + } + +#ifdef CONFIG_PROC_FS + proc_net_netfilter = proc_mkdir("netfilter", proc_net); + if (!proc_net_netfilter) + panic("cannot create netfilter proc entry"); +#endif + + if (netfilter_queue_init() < 0) + panic("cannot initialize nf_queue"); + if (netfilter_log_init() < 0) + panic("cannot initialize nf_log"); +} diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h new file mode 100644 index 000000000000..6bdee2910617 --- /dev/null +++ b/net/netfilter/nf_internals.h @@ -0,0 +1,39 @@ +#ifndef _NF_INTERNALS_H +#define _NF_INTERNALS_H + +#include +#include +#include +#include + +#ifdef CONFIG_NETFILTER_DEBUG +#define NFDEBUG(format, args...) printk(format , ## args) +#else +#define NFDEBUG(format, args...) +#endif + + +/* core.c */ +extern unsigned int nf_iterate(struct list_head *head, + struct sk_buff **skb, + int hook, + const struct net_device *indev, + const struct net_device *outdev, + struct list_head **i, + int (*okfn)(struct sk_buff *), + int hook_thresh); + +/* nf_queue.c */ +extern int nf_queue(struct sk_buff **skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum); +extern int __init netfilter_queue_init(void); + +/* nf_log.c */ +extern int __init netfilter_log_init(void); + +#endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c new file mode 100644 index 000000000000..ec58c4d2c667 --- /dev/null +++ b/net/netfilter/nf_log.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nf_internals.h" + +/* Internal logging interface, which relies on the real + LOG target modules */ + +#define NF_LOG_PREFIXLEN 128 + +static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ +static DEFINE_SPINLOCK(nf_log_lock); + +int nf_log_register(int pf, struct nf_logger *logger) +{ + int ret = -EBUSY; + + /* Any setup of logging members must be done before + * substituting pointer. */ + spin_lock(&nf_log_lock); + if (!nf_logging[pf]) { + rcu_assign_pointer(nf_logging[pf], logger); + ret = 0; + } + spin_unlock(&nf_log_lock); + return ret; +} +EXPORT_SYMBOL(nf_log_register); + +void nf_log_unregister_pf(int pf) +{ + spin_lock(&nf_log_lock); + nf_logging[pf] = NULL; + spin_unlock(&nf_log_lock); + + /* Give time to concurrent readers. */ + synchronize_net(); +} +EXPORT_SYMBOL(nf_log_unregister_pf); + +void nf_log_unregister_logger(struct nf_logger *logger) +{ + int i; + + spin_lock(&nf_log_lock); + for (i = 0; i < NPROTO; i++) { + if (nf_logging[i] == logger) + nf_logging[i] = NULL; + } + spin_unlock(&nf_log_lock); + + synchronize_net(); +} +EXPORT_SYMBOL(nf_log_unregister_logger); + +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_loginfo *loginfo, + const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + struct nf_logger *logger; + + rcu_read_lock(); + logger = rcu_dereference(nf_logging[pf]); + if (logger) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + /* We must read logging before nf_logfn[pf] */ + logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); + } else if (net_ratelimit()) { + printk(KERN_WARNING "nf_log_packet: can\'t log since " + "no backend logging module loaded in! Please either " + "load one, or disable logging explicitly\n"); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_packet); + +#ifdef CONFIG_PROC_FS +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + rcu_read_lock(); + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + rcu_read_unlock(); +} + +static int seq_show(struct seq_file *s, void *v) +{ + loff_t *pos = v; + const struct nf_logger *logger; + + logger = rcu_dereference(nf_logging[*pos]); + + if (!logger) + return seq_printf(s, "%2lld NONE\n", *pos); + + return seq_printf(s, "%2lld %s\n", *pos, logger->name); +} + +static struct seq_operations nflog_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nflog_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nflog_seq_ops); +} + +static struct file_operations nflog_file_ops = { + .owner = THIS_MODULE, + .open = nflog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* PROC_FS */ + + +int __init netfilter_log_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; + pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); +#endif + if (!pde) + return -1; + + pde->proc_fops = &nflog_file_ops; + + return 0; +} diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c new file mode 100644 index 000000000000..5586f843ed45 --- /dev/null +++ b/net/netfilter/nf_queue.c @@ -0,0 +1,273 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nf_internals.h" + +/* + * A queue handler may be registered for each protocol. Each is protected by + * long term mutex. The handler must provide an an outfn() to accept packets + * for queueing and must reinject all packets it receives, no matter what. + */ +static struct nf_queue_handler_t { + nf_queue_outfn_t outfn; + void *data; +} queue_handler[NPROTO]; + +static struct nf_queue_rerouter *queue_rerouter; + +static DEFINE_RWLOCK(queue_handler_lock); + + +int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) +{ + int ret; + + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + if (queue_handler[pf].outfn) + ret = -EBUSY; + else { + queue_handler[pf].outfn = outfn; + queue_handler[pf].data = data; + ret = 0; + } + write_unlock_bh(&queue_handler_lock); + + return ret; +} +EXPORT_SYMBOL(nf_register_queue_handler); + +/* The caller must flush their queue before this */ +int nf_unregister_queue_handler(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + write_unlock_bh(&queue_handler_lock); + + return 0; +} +EXPORT_SYMBOL(nf_unregister_queue_handler); + +int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); + +int nf_unregister_queue_rerouter(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + return 0; +} +EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); + +void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +{ + int pf; + + write_lock_bh(&queue_handler_lock); + for (pf = 0; pf < NPROTO; pf++) { + if (queue_handler[pf].outfn == outfn) { + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + } + } + write_unlock_bh(&queue_handler_lock); +} +EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); + +/* + * Any packet that leaves via this function must come back + * through nf_reinject(). + */ +int nf_queue(struct sk_buff **skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) +{ + int status; + struct nf_info *info; +#ifdef CONFIG_BRIDGE_NETFILTER + struct net_device *physindev = NULL; + struct net_device *physoutdev = NULL; +#endif + + /* QUEUE == DROP if noone is waiting, to be safe. */ + read_lock(&queue_handler_lock); + if (!queue_handler[pf].outfn) { + read_unlock(&queue_handler_lock); + kfree_skb(*skb); + return 1; + } + + info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); + if (!info) { + if (net_ratelimit()) + printk(KERN_ERR "OOM queueing packet %p\n", + *skb); + read_unlock(&queue_handler_lock); + kfree_skb(*skb); + return 1; + } + + *info = (struct nf_info) { + (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; + + /* If it's going away, ignore hook. */ + if (!try_module_get(info->elem->owner)) { + read_unlock(&queue_handler_lock); + kfree(info); + return 0; + } + + /* Bump dev refs so they don't vanish while packet is out */ + if (indev) dev_hold(indev); + if (outdev) dev_hold(outdev); + +#ifdef CONFIG_BRIDGE_NETFILTER + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; + if (physindev) dev_hold(physindev); + physoutdev = (*skb)->nf_bridge->physoutdev; + if (physoutdev) dev_hold(physoutdev); + } +#endif + if (queue_rerouter[pf].save) + queue_rerouter[pf].save(*skb, info); + + status = queue_handler[pf].outfn(*skb, info, queuenum, + queue_handler[pf].data); + + if (status >= 0 && queue_rerouter[pf].reroute) + status = queue_rerouter[pf].reroute(skb, info); + + read_unlock(&queue_handler_lock); + + if (status < 0) { + /* James M doesn't say fuck enough. */ + if (indev) dev_put(indev); + if (outdev) dev_put(outdev); +#ifdef CONFIG_BRIDGE_NETFILTER + if (physindev) dev_put(physindev); + if (physoutdev) dev_put(physoutdev); +#endif + module_put(info->elem->owner); + kfree(info); + kfree_skb(*skb); + + return 1; + } + + return 1; +} + +void nf_reinject(struct sk_buff *skb, struct nf_info *info, + unsigned int verdict) +{ + struct list_head *elem = &info->elem->list; + struct list_head *i; + + rcu_read_lock(); + + /* Release those devices we held, or Alexey will kill me. */ + if (info->indev) dev_put(info->indev); + if (info->outdev) dev_put(info->outdev); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + if (skb->nf_bridge->physindev) + dev_put(skb->nf_bridge->physindev); + if (skb->nf_bridge->physoutdev) + dev_put(skb->nf_bridge->physoutdev); + } +#endif + + /* Drop reference to owner of hook which queued us. */ + module_put(info->elem->owner); + + list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { + if (i == elem) + break; + } + + if (elem == &nf_hooks[info->pf][info->hook]) { + /* The module which sent it to userspace is gone. */ + NFDEBUG("%s: module disappeared, dropping packet.\n", + __FUNCTION__); + verdict = NF_DROP; + } + + /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) { + elem = elem->prev; + verdict = NF_ACCEPT; + } + + if (verdict == NF_ACCEPT) { + next_hook: + verdict = nf_iterate(&nf_hooks[info->pf][info->hook], + &skb, info->hook, + info->indev, info->outdev, &elem, + info->okfn, INT_MIN); + } + + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + info->okfn(skb); + break; + + case NF_QUEUE: + if (!nf_queue(&skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) + goto next_hook; + break; + } + rcu_read_unlock(); + + if (verdict == NF_DROP) + kfree_skb(skb); + + kfree(info); + return; +} +EXPORT_SYMBOL(nf_reinject); + +int __init netfilter_queue_init(void) +{ + queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), + GFP_KERNEL); + if (!queue_rerouter) + return -ENOMEM; + + memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); + + return 0; +} + diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c new file mode 100644 index 000000000000..61a833a9caa6 --- /dev/null +++ b/net/netfilter/nf_sockopt.c @@ -0,0 +1,132 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "nf_internals.h" + +/* Sockopts only registered and called from user context, so + net locking would be overkill. Also, [gs]etsockopt calls may + sleep. */ +static DECLARE_MUTEX(nf_sockopt_mutex); +static LIST_HEAD(nf_sockopts); + +/* Do exclusive ranges overlap? */ +static inline int overlap(int min1, int max1, int min2, int max2) +{ + return max1 > min2 && min1 < max2; +} + +/* Functions to register sockopt ranges (exclusive). */ +int nf_register_sockopt(struct nf_sockopt_ops *reg) +{ + struct list_head *i; + int ret = 0; + + if (down_interruptible(&nf_sockopt_mutex) != 0) + return -EINTR; + + list_for_each(i, &nf_sockopts) { + struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + if (ops->pf == reg->pf + && (overlap(ops->set_optmin, ops->set_optmax, + reg->set_optmin, reg->set_optmax) + || overlap(ops->get_optmin, ops->get_optmax, + reg->get_optmin, reg->get_optmax))) { + NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", + ops->set_optmin, ops->set_optmax, + ops->get_optmin, ops->get_optmax, + reg->set_optmin, reg->set_optmax, + reg->get_optmin, reg->get_optmax); + ret = -EBUSY; + goto out; + } + } + + list_add(®->list, &nf_sockopts); +out: + up(&nf_sockopt_mutex); + return ret; +} +EXPORT_SYMBOL(nf_register_sockopt); + +void nf_unregister_sockopt(struct nf_sockopt_ops *reg) +{ + /* No point being interruptible: we're probably in cleanup_module() */ + restart: + down(&nf_sockopt_mutex); + if (reg->use != 0) { + /* To be woken by nf_sockopt call... */ + /* FIXME: Stuart Young's name appears gratuitously. */ + set_current_state(TASK_UNINTERRUPTIBLE); + reg->cleanup_task = current; + up(&nf_sockopt_mutex); + schedule(); + goto restart; + } + list_del(®->list); + up(&nf_sockopt_mutex); +} +EXPORT_SYMBOL(nf_unregister_sockopt); + +/* Call get/setsockopt() */ +static int nf_sockopt(struct sock *sk, int pf, int val, + char __user *opt, int *len, int get) +{ + struct list_head *i; + struct nf_sockopt_ops *ops; + int ret; + + if (down_interruptible(&nf_sockopt_mutex) != 0) + return -EINTR; + + list_for_each(i, &nf_sockopts) { + ops = (struct nf_sockopt_ops *)i; + if (ops->pf == pf) { + if (get) { + if (val >= ops->get_optmin + && val < ops->get_optmax) { + ops->use++; + up(&nf_sockopt_mutex); + ret = ops->get(sk, val, opt, len); + goto out; + } + } else { + if (val >= ops->set_optmin + && val < ops->set_optmax) { + ops->use++; + up(&nf_sockopt_mutex); + ret = ops->set(sk, val, opt, *len); + goto out; + } + } + } + } + up(&nf_sockopt_mutex); + return -ENOPROTOOPT; + + out: + down(&nf_sockopt_mutex); + ops->use--; + if (ops->cleanup_task) + wake_up_process(ops->cleanup_task); + up(&nf_sockopt_mutex); + return ret; +} + +int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, + int len) +{ + return nf_sockopt(sk, pf, val, opt, &len, 0); +} +EXPORT_SYMBOL(nf_setsockopt); + +int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) +{ + return nf_sockopt(sk, pf, val, opt, len, 1); +} +EXPORT_SYMBOL(nf_getsockopt); + From fbcd923c3e0c8ec9e4ed64f5a4e5766807b32729 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:22:10 -0700 Subject: [PATCH 346/584] [NETFILTER]: add correct bridging support to nfnetlink_{queue,log} This patch adds support for passing the real 'physical' device ifindex down to userspace via nfnetlink_log and nfnetlink_queue. This feature basically obsoletes net/bridge/netfilter/ebt_ulog.c, and it is likely ebt_ulog.c will die with one of the next couple of patches. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 2 + include/linux/netfilter/nfnetlink_queue.h | 2 + net/netfilter/nfnetlink_log.c | 58 +++++++++++++++++++++++ net/netfilter/nfnetlink_queue.c | 58 +++++++++++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index 420ff4625cbf..a61836a083e7 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -40,6 +40,8 @@ enum nfulnl_attr_type { NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFULA_HWADDR, /* nfulnl_msg_packet_hw */ NFULA_PAYLOAD, /* opaque data payload */ NFULA_PREFIX, /* string prefix */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index e142b0ff7c08..2d8d2b2cfcaa 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -36,6 +36,8 @@ enum nfqnl_attr_type { NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11584289c262..464c9fa2934b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -33,6 +33,10 @@ #include +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + #define NFULNL_NLBUFSIZ_DEFAULT 4096 #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ @@ -412,14 +416,64 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev) { tmp_uint = htonl(indev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (pf == PF_BRIDGE) { + /* Case 1: outdev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(indev->br_port->br->dev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + sizeof(tmp_uint), &tmp_uint); + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + sizeof(tmp_uint), &tmp_uint); + if (skb->nf_bridge && skb->nf_bridge->physindev) { + tmp_uint = + htonl(skb->nf_bridge->physindev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (outdev) { tmp_uint = htonl(outdev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(outdev->br_port->br->dev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + sizeof(tmp_uint), &tmp_uint); + } else { + /* Case 2: indev is a bridge group, we need to look + * for physical device (when called from ipv4) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + sizeof(tmp_uint), &tmp_uint); + if (skb->nf_bridge) { + tmp_uint = + htonl(skb->nf_bridge->physoutdev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (skb->nfmark) { @@ -536,6 +590,10 @@ nfulnl_log_packet(unsigned int pf, + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ +#endif + NFA_SPACE(sizeof(u_int32_t)) /* mark */ + NFA_SPACE(sizeof(u_int32_t)) /* uid */ + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 04323ee1eb8d..bf9223084b4a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -30,6 +30,10 @@ #include +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + #define NFQNL_QMAX_DEFAULT 1024 #if 0 @@ -361,6 +365,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ +#endif + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); @@ -412,12 +420,62 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->info->indev) { tmp_uint = htonl(entry->info->indev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (entry->info->pf == PF_BRIDGE) { + /* Case 1: indev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), + &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + if (entry->skb->nf_bridge + && entry->skb->nf_bridge->physindev) { + tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (entry->info->outdev) { tmp_uint = htonl(entry->info->outdev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (entry->info->pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), + &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + } else { + /* Case 2: outdev is bridge group, we need to look for + * physical output device (when called from ipv4) */ + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + if (entry->skb->nf_bridge + && entry->skb->nf_bridge->physoutdev) { + tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (entry->skb->nfmark) { From bbd86b9fc469b7e91dc7444e6abb8930811d79cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:11 -0700 Subject: [PATCH 347/584] [NETFILTER]: add /proc/net/netfilter interface to nf_queue This patch adds a /proc/net/netfilter/nf_queue file, similar to the recently-added /proc/net/netfilter/nf_log. It indicates which queue handler is registered to which protocol family. This is useful since there are now multiple queue handlers in the treee (ip[6]_queue, nfnetlink_queue). Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 13 ++-- net/ipv4/netfilter/ip_queue.c | 9 ++- net/ipv6/netfilter/ip6_queue.c | 9 ++- net/netfilter/nf_log.c | 1 + net/netfilter/nf_queue.c | 106 ++++++++++++++++++++++++++------ net/netfilter/nfnetlink_queue.c | 12 ++-- 6 files changed, 116 insertions(+), 34 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 815583af06c2..bf430fcbe364 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -225,13 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, int *len); /* Packet queuing */ -typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, - unsigned int queuenum, void *data); +struct nf_queue_handler { + int (*outfn)(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data); + void *data; + char *name; +}; extern int nf_register_queue_handler(int pf, - nf_queue_outfn_t outfn, void *data); + struct nf_queue_handler *qh); extern int nf_unregister_queue_handler(int pf); -extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); +extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 629de649f130..1c49833e00a9 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -656,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) } #endif /* CONFIG_PROC_FS */ +static struct nf_queue_handler nfqh = { + .name = "ip_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -684,7 +689,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -692,7 +697,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 56ffec3568fa..7ecb91e24a34 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -652,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) return len; } +static struct nf_queue_handler nfqh = { + .name = "ip6_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -679,7 +684,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET6, &nfqh); if (status < 0) { printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -687,7 +692,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ec58c4d2c667..31a9d63921d6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5586f843ed45..8a67bde8b640 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" @@ -14,17 +15,12 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static struct nf_queue_handler_t { - nf_queue_outfn_t outfn; - void *data; -} queue_handler[NPROTO]; - +static struct nf_queue_handler *queue_handler[NPROTO]; static struct nf_queue_rerouter *queue_rerouter; static DEFINE_RWLOCK(queue_handler_lock); - -int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) +int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) { int ret; @@ -32,11 +28,10 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) return -EINVAL; write_lock_bh(&queue_handler_lock); - if (queue_handler[pf].outfn) + if (queue_handler[pf]) ret = -EBUSY; else { - queue_handler[pf].outfn = outfn; - queue_handler[pf].data = data; + queue_handler[pf] = qh; ret = 0; } write_unlock_bh(&queue_handler_lock); @@ -52,8 +47,7 @@ int nf_unregister_queue_handler(int pf) return -EINVAL; write_lock_bh(&queue_handler_lock); - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; + queue_handler[pf] = NULL; write_unlock_bh(&queue_handler_lock); return 0; @@ -85,16 +79,14 @@ int nf_unregister_queue_rerouter(int pf) } EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); -void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +void nf_unregister_queue_handlers(struct nf_queue_handler *qh) { int pf; write_lock_bh(&queue_handler_lock); for (pf = 0; pf < NPROTO; pf++) { - if (queue_handler[pf].outfn == outfn) { - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - } + if (queue_handler[pf] == qh) + queue_handler[pf] = NULL; } write_unlock_bh(&queue_handler_lock); } @@ -121,7 +113,7 @@ int nf_queue(struct sk_buff **skb, /* QUEUE == DROP if noone is waiting, to be safe. */ read_lock(&queue_handler_lock); - if (!queue_handler[pf].outfn) { + if (!queue_handler[pf]->outfn) { read_unlock(&queue_handler_lock); kfree_skb(*skb); return 1; @@ -162,8 +154,8 @@ int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queuenum, - queue_handler[pf].data); + status = queue_handler[pf]->outfn(*skb, info, queuenum, + queue_handler[pf]->data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -259,13 +251,87 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, } EXPORT_SYMBOL(nf_reinject); +#ifdef CONFIG_PROC_FS +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + +} + +static int seq_show(struct seq_file *s, void *v) +{ + int ret; + loff_t *pos = v; + struct nf_queue_handler *qh; + + read_lock_bh(&queue_handler_lock); + qh = queue_handler[*pos]; + if (!qh) + ret = seq_printf(s, "%2lld NONE\n", *pos); + else + ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); + read_unlock_bh(&queue_handler_lock); + + return ret; +} + +static struct seq_operations nfqueue_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nfqueue_seq_ops); +} + +static struct file_operations nfqueue_file_ops = { + .owner = THIS_MODULE, + .open = nfqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* PROC_FS */ + + int __init netfilter_queue_init(void) { +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); if (!queue_rerouter) return -ENOMEM; +#ifdef CONFIG_PROC_FS + pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); + if (!pde) { + kfree(queue_rerouter); + return -1; + } + pde->proc_fops = &nfqueue_file_ops; +#endif memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); return 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index bf9223084b4a..741686ff71d8 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -845,6 +845,11 @@ static const int nfqa_cfg_min[NFQA_CFG_MAX] = { [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), }; +static struct nf_queue_handler nfqh = { + .name = "nf_queue", + .outfn = &nfqnl_enqueue_packet, +}; + static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -890,10 +895,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, case NFQNL_CFG_CMD_PF_BIND: QDEBUG("registering queue handler for pf=%u\n", ntohs(cmd->pf)); - ret = nf_register_queue_handler(ntohs(cmd->pf), - nfqnl_enqueue_packet, - NULL); - + ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); break; case NFQNL_CFG_CMD_PF_UNBIND: QDEBUG("unregistering queue handler for pf=%u\n", @@ -1098,7 +1100,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(nfqnl_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", proc_net_netfilter); From d72367b6f36e557f122beefaa8c6b80eb1c7f245 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:36 -0700 Subject: [PATCH 348/584] [NETFILTER]: more verbose return codes from nf_{log,queue} This adds EEXIST to distinguish between the following return values: 0: nobody was registered, registration successful EEXIST: the exact same handler was already registered, no registration required EBUSY: somebody else is registered, registration unsuccessful. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/netfilter/nf_log.c | 6 +++++- net/netfilter/nf_queue.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 31a9d63921d6..e104760f7a67 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -18,6 +18,8 @@ static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ static DEFINE_SPINLOCK(nf_log_lock); +/* return EBUSY if somebody else is registered, EEXIST if the same logger + * is registred, 0 on success. */ int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; @@ -28,7 +30,9 @@ int nf_log_register(int pf, struct nf_logger *logger) if (!nf_logging[pf]) { rcu_assign_pointer(nf_logging[pf], logger); ret = 0; - } + } else if (nf_logging[pf] == logger) + ret = -EEXIST; + spin_unlock(&nf_log_lock); return ret; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8a67bde8b640..d10d552d9c40 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -20,6 +20,8 @@ static struct nf_queue_rerouter *queue_rerouter; static DEFINE_RWLOCK(queue_handler_lock); +/* return EBUSY when somebody else is registered, return EEXIST if the + * same handler is registered, return 0 in case of success. */ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) { int ret; @@ -28,7 +30,9 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) return -EINVAL; write_lock_bh(&queue_handler_lock); - if (queue_handler[pf]) + if (queue_handler[pf] == qh) + ret = -EEXIST; + else if (queue_handler[pf]) ret = -EBUSY; else { queue_handler[pf] = qh; From 8a61fadb3908454ccfa538aaa75eb1d22def5700 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:53 -0700 Subject: [PATCH 349/584] [NETFILTER]: check nf_log function call arguments Check whether pf is too large in order to prevent array overflow. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- net/netfilter/nf_log.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf430fcbe364..ac3c61411d4b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -157,7 +157,7 @@ struct nf_logger { /* Function to register/unregister log function. */ int nf_log_register(int pf, struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +int nf_log_unregister_pf(int pf); void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e104760f7a67..573e76a770d9 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -24,6 +24,9 @@ int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; + if (pf >= NPROTO) + return -EINVAL; + /* Any setup of logging members must be done before * substituting pointer. */ spin_lock(&nf_log_lock); @@ -38,14 +41,19 @@ int nf_log_register(int pf, struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(int pf) +int nf_log_unregister_pf(int pf) { + if (pf >= NPROTO) + return -EINVAL; + spin_lock(&nf_log_lock); nf_logging[pf] = NULL; spin_unlock(&nf_log_lock); /* Give time to concurrent readers. */ synchronize_net(); + + return 0; } EXPORT_SYMBOL(nf_log_unregister_pf); From 7663f18807805f02608457af8e2f59eee5d910fd Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 9 Aug 2005 20:24:15 -0700 Subject: [PATCH 350/584] [NETFILTER]: return ENOMEM when ip_conntrack_alloc() fails. This patch fixes the bug which doesn't return ERR_PTR(-ENOMEM) if it failed to allocate memory space from slab cache. This bug leads to erroneously not dropped packets under stress, and wrong statistic counters ('invalid' is incremented instead of 'drop'). It was introduced during the ctnetlink merge in the net-2.6.14 tree, so no stable or mainline releases affected. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 9261388d5ac2..285743bfbed3 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -655,7 +655,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); - return NULL; + return ERR_PTR(-ENOMEM); } memset(conntrack, 0, sizeof(*conntrack)); @@ -696,8 +696,9 @@ init_conntrack(struct ip_conntrack_tuple *tuple, return NULL; } - if (!(conntrack = ip_conntrack_alloc(tuple, &repl_tuple))) - return NULL; + conntrack = ip_conntrack_alloc(tuple, &repl_tuple); + if (conntrack == NULL || IS_ERR(conntrack)) + return (struct ip_conntrack_tuple_hash *)conntrack; if (!protocol->new(conntrack, skb)) { ip_conntrack_free(conntrack); From 91b9a277fc4d207249e459a455abf804ebb5499d Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 9 Aug 2005 20:24:39 -0700 Subject: [PATCH 351/584] [IPV4]: FIB Trie cleanups. Below is a patch that cleans up some of this, supposedly without changing any behaviour: * Whitespace cleanups * Introduce DBG() * BUG_ON() instead of if () { BUG(); } * Remove some of the deep nesting to make the code flow more comprehensible * Some mask operations were simplified Signed-off-by: Olof Johansson Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 1247 +++++++++++++++++++++---------------------- 1 file changed, 597 insertions(+), 650 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 45efd5f4741b..6f818cc7efd0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -89,27 +89,27 @@ typedef unsigned int t_key; #define T_TNODE 0 #define T_LEAF 1 #define NODE_TYPE_MASK 0x1UL -#define NODE_PARENT(_node) \ - ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK)) -#define NODE_SET_PARENT(_node, _ptr) \ - ((_node)->_parent = (((unsigned long)(_ptr)) | \ - ((_node)->_parent & NODE_TYPE_MASK))) -#define NODE_INIT_PARENT(_node, _type) \ - ((_node)->_parent = (_type)) -#define NODE_TYPE(_node) \ - ((_node)->_parent & NODE_TYPE_MASK) +#define NODE_PARENT(node) \ + ((struct tnode *)((node)->parent & ~NODE_TYPE_MASK)) +#define NODE_SET_PARENT(node, ptr) \ + ((node)->parent = (((unsigned long)(ptr)) | \ + ((node)->parent & NODE_TYPE_MASK))) +#define NODE_INIT_PARENT(node, type) \ + ((node)->parent = (type)) +#define NODE_TYPE(node) \ + ((node)->parent & NODE_TYPE_MASK) -#define IS_TNODE(n) (!(n->_parent & T_LEAF)) -#define IS_LEAF(n) (n->_parent & T_LEAF) +#define IS_TNODE(n) (!(n->parent & T_LEAF)) +#define IS_LEAF(n) (n->parent & T_LEAF) struct node { - t_key key; - unsigned long _parent; + t_key key; + unsigned long parent; }; struct leaf { - t_key key; - unsigned long _parent; + t_key key; + unsigned long parent; struct hlist_head list; }; @@ -120,13 +120,13 @@ struct leaf_info { }; struct tnode { - t_key key; - unsigned long _parent; - unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ - unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ - unsigned short full_children; /* KEYLENGTH bits needed */ - unsigned short empty_children; /* KEYLENGTH bits needed */ - struct node *child[0]; + t_key key; + unsigned long parent; + unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ + unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ + unsigned short full_children; /* KEYLENGTH bits needed */ + unsigned short empty_children; /* KEYLENGTH bits needed */ + struct node *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -150,16 +150,18 @@ struct trie_stat { }; struct trie { - struct node *trie; + struct node *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif - int size; + int size; unsigned int revision; }; static int trie_debug = 0; +#define DBG(x...) do { if (trie_debug) printk(x); } while (0) + static int tnode_full(struct tnode *tn, struct node *n); static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); @@ -171,56 +173,31 @@ static void tnode_free(struct tnode *tn); static void trie_dump_seq(struct seq_file *seq, struct trie *t); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int *dflt); + struct fib_info **last_resort, int *last_idx, int *dflt); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); + struct nlmsghdr *n, struct netlink_skb_parms *req); static kmem_cache_t *fn_alias_kmem; static struct trie *trie_local = NULL, *trie_main = NULL; -static void trie_bug(char *err) -{ - printk("Trie Bug: %s\n", err); - BUG(); -} - static inline struct node *tnode_get_child(struct tnode *tn, int i) { - if (i >= 1<bits) - trie_bug("tnode_get_child"); + BUG_ON(i >= 1 << tn->bits); - return tn->child[i]; + return tn->child[i]; } static inline int tnode_child_length(struct tnode *tn) { - return 1<bits; + return 1 << tn->bits; } -/* - _________________________________________________________________ - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | - ---------------------------------------------------------------- - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - - _________________________________________________________________ - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | - ----------------------------------------------------------------- - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - - tp->pos = 7 - tp->bits = 3 - n->pos = 15 - n->bits=4 - KEYLENGTH=32 -*/ - static inline t_key tkey_extract_bits(t_key a, int offset, int bits) { - if (offset < KEYLENGTH) + if (offset < KEYLENGTH) return ((t_key)(a << offset)) >> (KEYLENGTH - bits); - else + else return 0; } @@ -233,8 +210,8 @@ static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) { if (bits == 0 || offset >= KEYLENGTH) return 1; - bits = bits > KEYLENGTH ? KEYLENGTH : bits; - return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; + bits = bits > KEYLENGTH ? KEYLENGTH : bits; + return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; } static inline int tkey_mismatch(t_key a, int offset, t_key b) @@ -249,7 +226,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) return i; } -/* Candiate for fib_semantics */ +/* Candidate for fib_semantics */ static void fn_free_alias(struct fib_alias *fa) { @@ -295,7 +272,7 @@ static void fn_free_alias(struct fib_alias *fa) tp->pos = 7 tp->bits = 3 n->pos = 15 - n->bits=4 + n->bits = 4 First, let's just ignore the bits that come before the parent tp, that is the bits from 0 to (tp->pos-1). They are *known* but at this point we do @@ -343,10 +320,13 @@ static struct leaf *leaf_new(void) static struct leaf_info *leaf_info_new(int plen) { struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); - if (li) { - li->plen = plen; - INIT_LIST_HEAD(&li->falh); - } + + if (!li) + return NULL; + + li->plen = plen; + INIT_LIST_HEAD(&li->falh); + return li; } @@ -373,7 +353,7 @@ static struct tnode *tnode_alloc(unsigned int size) static void __tnode_free(struct tnode *tn) { unsigned int size = sizeof(struct tnode) + - (1<bits) * sizeof(struct node *); + (1 << tn->bits) * sizeof(struct node *); if (size <= PAGE_SIZE) kfree(tn); @@ -387,7 +367,7 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); struct tnode *tn = tnode_alloc(sz); - if (tn) { + if (tn) { memset(tn, 0, sz); NODE_INIT_PARENT(tn, T_TNODE); tn->pos = pos; @@ -397,29 +377,21 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) tn->empty_children = 1< 0) - printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), - (unsigned int) (sizeof(struct node) * 1< 0 ) - printk("FL %p \n", tn); - } - else if (IS_TNODE(tn)) { + DBG("FL %p \n", tn); + } else { __tnode_free(tn); - if (trie_debug > 0 ) - printk("FT %p \n", tn); - } - else { - trie_bug("tnode_free\n"); + DBG("FT %p \n", tn); } } @@ -453,7 +425,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w if (i >= 1<bits) { printk("bits=%d, i=%d\n", tn->bits, i); - trie_bug("tnode_put_child_reorg bits"); + BUG(); } write_lock_bh(&fib_lock); chi = tn->child[i]; @@ -465,15 +437,15 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w tn->empty_children--; /* update fullChildren */ - if (wasfull == -1) + if (wasfull == -1) wasfull = tnode_full(tn, chi); isfull = tnode_full(tn, n); if (wasfull && !isfull) tn->full_children--; - else if (!wasfull && isfull) tn->full_children++; + if (n) NODE_SET_PARENT(n, tn); @@ -489,9 +461,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (!tn) return NULL; - if (trie_debug) - printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", - tn, inflate_threshold, halve_threshold); + DBG("In tnode_resize %p inflate_threshold=%d threshold=%d\n", + tn, inflate_threshold, halve_threshold); /* No children */ if (tn->empty_children == tnode_child_length(tn)) { @@ -501,20 +472,21 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* One child */ if (tn->empty_children == tnode_child_length(tn) - 1) for (i = 0; i < tnode_child_length(tn); i++) { + struct node *n; write_lock_bh(&fib_lock); - if (tn->child[i] != NULL) { - - /* compress one level */ - struct node *n = tn->child[i]; - if (n) - NODE_INIT_PARENT(n, NODE_TYPE(n)); - + n = tn->child[i]; + if (!n) { write_unlock_bh(&fib_lock); - tnode_free(tn); - return n; + continue; } + + /* compress one level */ + NODE_INIT_PARENT(n, NODE_TYPE(n)); + write_unlock_bh(&fib_lock); + tnode_free(tn); + return n; } /* * Double as long as the resulting node has a number of @@ -566,16 +538,16 @@ static struct node *resize(struct trie *t, struct tnode *tn) * * expand not_to_be_doubled and to_be_doubled, and shorten: * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children ) >= inflate_threshold * new_child_length + * tn->full_children) >= inflate_threshold * new_child_length * * expand new_child_length: * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children ) >= + * tn->full_children) >= * inflate_threshold * tnode_child_length(tn) * 2 * * shorten again: * 50 * (tn->full_children + tnode_child_length(tn) - - * tn->empty_children ) >= inflate_threshold * + * tn->empty_children) >= inflate_threshold * * tnode_child_length(tn) * */ @@ -624,20 +596,23 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (tn->empty_children == tnode_child_length(tn) - 1) for (i = 0; i < tnode_child_length(tn); i++) { - + struct node *n; + write_lock_bh(&fib_lock); - if (tn->child[i] != NULL) { - /* compress one level */ - struct node *n = tn->child[i]; - - if (n) - NODE_INIT_PARENT(n, NODE_TYPE(n)); + n = tn->child[i]; + if (!n) { write_unlock_bh(&fib_lock); - tnode_free(tn); - return n; + continue; } + + /* compress one level */ + + NODE_INIT_PARENT(n, NODE_TYPE(n)); + write_unlock_bh(&fib_lock); + tnode_free(tn); + return n; } return (struct node *) tn; @@ -650,8 +625,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) int olen = tnode_child_length(tn); int i; - if (trie_debug) - printk("In inflate\n"); + DBG("In inflate\n"); tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); @@ -666,8 +640,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) * fails. In case of failure we return the oldnode and inflate * of tnode is ignored. */ - - for(i = 0; i < olen; i++) { + + for (i = 0; i < olen; i++) { struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); if (inode && @@ -675,7 +649,6 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) inode->pos == oldtnode->pos + oldtnode->bits && inode->bits > 1) { struct tnode *left, *right; - t_key m = TKEY_GET_MASK(inode->pos, 1); left = tnode_new(inode->key&(~m), inode->pos + 1, @@ -685,7 +658,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) *err = -ENOMEM; break; } - + right = tnode_new(inode->key|m, inode->pos + 1, inode->bits - 1); @@ -703,18 +676,20 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) int size = tnode_child_length(tn); int j; - for(j = 0; j < size; j++) + for (j = 0; j < size; j++) if (tn->child[j]) tnode_free((struct tnode *)tn->child[j]); tnode_free(tn); - + *err = -ENOMEM; return oldtnode; } - for(i = 0; i < olen; i++) { + for (i = 0; i < olen; i++) { struct node *node = tnode_get_child(oldtnode, i); + struct tnode *left, *right; + int size, j; /* An empty child */ if (node == NULL) @@ -740,56 +715,51 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) put_child(t, tn, 2*i+1, inode->child[1]); tnode_free(inode); + continue; } - /* An internal node with more than two children */ - else { - struct tnode *left, *right; - int size, j; + /* An internal node with more than two children */ - /* We will replace this node 'inode' with two new - * ones, 'left' and 'right', each with half of the - * original children. The two new nodes will have - * a position one bit further down the key and this - * means that the "significant" part of their keys - * (see the discussion near the top of this file) - * will differ by one bit, which will be "0" in - * left's key and "1" in right's key. Since we are - * moving the key position by one step, the bit that - * we are moving away from - the bit at position - * (inode->pos) - is the one that will differ between - * left and right. So... we synthesize that bit in the - * two new keys. - * The mask 'm' below will be a single "one" bit at - * the position (inode->pos) - */ + /* We will replace this node 'inode' with two new + * ones, 'left' and 'right', each with half of the + * original children. The two new nodes will have + * a position one bit further down the key and this + * means that the "significant" part of their keys + * (see the discussion near the top of this file) + * will differ by one bit, which will be "0" in + * left's key and "1" in right's key. Since we are + * moving the key position by one step, the bit that + * we are moving away from - the bit at position + * (inode->pos) - is the one that will differ between + * left and right. So... we synthesize that bit in the + * two new keys. + * The mask 'm' below will be a single "one" bit at + * the position (inode->pos) + */ - /* Use the old key, but set the new significant - * bit to zero. - */ + /* Use the old key, but set the new significant + * bit to zero. + */ - left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(t, tn, 2*i, NULL); + left = (struct tnode *) tnode_get_child(tn, 2*i); + put_child(t, tn, 2*i, NULL); - if (!left) - BUG(); + BUG_ON(!left); - right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(t, tn, 2*i+1, NULL); + right = (struct tnode *) tnode_get_child(tn, 2*i+1); + put_child(t, tn, 2*i+1, NULL); - if (!right) - BUG(); + BUG_ON(!right); - size = tnode_child_length(left); - for(j = 0; j < size; j++) { - put_child(t, left, j, inode->child[j]); - put_child(t, right, j, inode->child[j + size]); - } - put_child(t, tn, 2*i, resize(t, left)); - put_child(t, tn, 2*i+1, resize(t, right)); - - tnode_free(inode); + size = tnode_child_length(left); + for (j = 0; j < size; j++) { + put_child(t, left, j, inode->child[j]); + put_child(t, right, j, inode->child[j + size]); } + put_child(t, tn, 2*i, resize(t, left)); + put_child(t, tn, 2*i+1, resize(t, right)); + + tnode_free(inode); } tnode_free(oldtnode); return tn; @@ -802,7 +772,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) int i; int olen = tnode_child_length(tn); - if (trie_debug) printk("In halve\n"); + DBG("In halve\n"); tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); @@ -818,7 +788,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) * of tnode is ignored. */ - for(i = 0; i < olen; i += 2) { + for (i = 0; i < olen; i += 2) { left = tnode_get_child(oldtnode, i); right = tnode_get_child(oldtnode, i+1); @@ -839,17 +809,19 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) int size = tnode_child_length(tn); int j; - for(j = 0; j < size; j++) + for (j = 0; j < size; j++) if (tn->child[j]) tnode_free((struct tnode *)tn->child[j]); tnode_free(tn); - + *err = -ENOMEM; return oldtnode; } - for(i = 0; i < olen; i += 2) { + for (i = 0; i < olen; i += 2) { + struct tnode *newBinNode; + left = tnode_get_child(oldtnode, i); right = tnode_get_child(oldtnode, i+1); @@ -858,38 +830,39 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) if (right == NULL) /* Both are empty */ continue; put_child(t, tn, i/2, right); - } else if (right == NULL) + continue; + } + + if (right == NULL) { put_child(t, tn, i/2, left); + continue; + } /* Two nonempty children */ - else { - struct tnode *newBinNode = - (struct tnode *) tnode_get_child(tn, i/2); - put_child(t, tn, i/2, NULL); + newBinNode = (struct tnode *) tnode_get_child(tn, i/2); + put_child(t, tn, i/2, NULL); - if (!newBinNode) - BUG(); + BUG_ON(!newBinNode); - put_child(t, newBinNode, 0, left); - put_child(t, newBinNode, 1, right); - put_child(t, tn, i/2, resize(t, newBinNode)); - } + put_child(t, newBinNode, 0, left); + put_child(t, newBinNode, 1, right); + put_child(t, tn, i/2, resize(t, newBinNode)); } tnode_free(oldtnode); return tn; } -static void *trie_init(struct trie *t) +static void trie_init(struct trie *t) { - if (t) { - t->size = 0; - t->trie = NULL; - t->revision = 0; + if (!t) + return; + + t->size = 0; + t->trie = NULL; + t->revision = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS - memset(&t->stats, 0, sizeof(struct trie_use_stats)); + memset(&t->stats, 0, sizeof(struct trie_use_stats)); #endif - } - return t; } static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) @@ -897,39 +870,37 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) struct hlist_node *node; struct leaf_info *li; - hlist_for_each_entry(li, node, head, hlist) { + hlist_for_each_entry(li, node, head, hlist) if (li->plen == plen) return li; - } + return NULL; } static inline struct list_head * get_fa_head(struct leaf *l, int plen) { - struct list_head *fa_head = NULL; struct leaf_info *li = find_leaf_info(&l->list, plen); - if (li) - fa_head = &li->falh; + if (!li) + return NULL; - return fa_head; + return &li->falh; } static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) { struct leaf_info *li = NULL, *last = NULL; - struct hlist_node *node, *tmp; + struct hlist_node *node; write_lock_bh(&fib_lock); - if (hlist_empty(head)) + if (hlist_empty(head)) { hlist_add_head(&new->hlist, head); - else { - hlist_for_each_entry_safe(li, node, tmp, head, hlist) { - + } else { + hlist_for_each_entry(li, node, head, hlist) { if (new->plen > li->plen) break; - + last = li; } if (last) @@ -952,49 +923,47 @@ fib_find_node(struct trie *t, u32 key) while (n != NULL && NODE_TYPE(n) == T_TNODE) { tn = (struct tnode *) n; - + check_tnode(tn); - + if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - pos=tn->pos + tn->bits; + pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); - } - else + } else break; } /* Case we have found a leaf. Compare prefixes */ - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { - struct leaf *l = (struct leaf *) n; - return l; - } + if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) + return (struct leaf *)n; + return NULL; } static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { - int i = 0; + int i; int wasfull; t_key cindex, key; struct tnode *tp = NULL; - if (!tn) - BUG(); + BUG_ON(!tn); key = tn->key; i = 0; while (tn != NULL && NODE_PARENT(tn) != NULL) { - if (i > 10) { printk("Rebalance tn=%p \n", tn); - if (tn) printk("tn->parent=%p \n", NODE_PARENT(tn)); - + if (tn) + printk("tn->parent=%p \n", NODE_PARENT(tn)); + printk("Rebalance tp=%p \n", tp); - if (tp) printk("tp->parent=%p \n", NODE_PARENT(tp)); + if (tp) + printk("tp->parent=%p \n", NODE_PARENT(tp)); } - if (i > 12) BUG(); + BUG_ON(i > 12); /* Why is this a bug? -ojn */ i++; tp = NODE_PARENT(tn); @@ -1002,7 +971,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize (t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); - + if (!NODE_PARENT(tn)) break; @@ -1050,20 +1019,19 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) while (n != NULL && NODE_TYPE(n) == T_TNODE) { tn = (struct tnode *) n; - + check_tnode(tn); - + if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { tp = tn; - pos=tn->pos + tn->bits; + pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); if (n && NODE_PARENT(n) != tn) { printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); BUG(); } - } - else + } else break; } @@ -1073,17 +1041,15 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) * tp is n's (parent) ----> NULL or TNODE */ - if (tp && IS_LEAF(tp)) - BUG(); - + BUG_ON(tp && IS_LEAF(tp)); /* Case 1: n is a leaf. Compare prefixes */ if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { - struct leaf *l = ( struct leaf *) n; - + struct leaf *l = (struct leaf *) n; + li = leaf_info_new(plen); - + if (!li) { *err = -ENOMEM; goto err; @@ -1113,35 +1079,31 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) fa_head = &li->falh; insert_leaf_info(&l->list, li); - /* Case 2: n is NULL, and will just insert a new leaf */ if (t->trie && n == NULL) { + /* Case 2: n is NULL, and will just insert a new leaf */ NODE_SET_PARENT(l, tp); - - if (!tp) - BUG(); - else { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct node *)l); - } - } - /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ - else { + BUG_ON(!tp); + + cindex = tkey_extract_bits(key, tp->pos, tp->bits); + put_child(t, (struct tnode *)tp, cindex, (struct node *)l); + } else { + /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* * Add a new tnode here * first tnode need some special handling */ if (tp) - pos=tp->pos+tp->bits; + pos = tp->pos+tp->bits; else - pos=0; + pos = 0; + if (n) { newpos = tkey_mismatch(key, pos, n->key); tn = tnode_new(n->key, newpos, 1); - } - else { + } else { newpos = 0; tn = tnode_new(key, newpos, 1); /* First tnode */ } @@ -1151,32 +1113,32 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) tnode_free((struct tnode *) l); *err = -ENOMEM; goto err; - } - + } + NODE_SET_PARENT(tn, tp); - missbit=tkey_extract_bits(key, newpos, 1); + missbit = tkey_extract_bits(key, newpos, 1); put_child(t, tn, missbit, (struct node *)l); put_child(t, tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); - } - else { + } else { t->trie = (struct node*) tn; /* First tnode */ tp = tn; } } - if (tp && tp->pos+tp->bits > 32) { + + if (tp && tp->pos + tp->bits > 32) printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); - } + /* Rebalance the trie */ t->trie = trie_rebalance(t, tp); done: t->revision++; -err:; +err: return fa_head; } @@ -1204,17 +1166,18 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = ntohl(key); - if (trie_debug) - printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + DBG("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); - mask = ntohl( inet_make_mask(plen) ); + mask = ntohl(inet_make_mask(plen)); if (key & ~mask) return -EINVAL; key = key & mask; - if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL) + fi = fib_create_info(r, rta, nlhdr, &err); + + if (!fi) goto err; l = fib_find_node(t, key); @@ -1236,8 +1199,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, * and we need to allocate a new one of those as well. */ - if (fa && - fa->fa_info->fib_priority == fi->fib_priority) { + if (fa && fa->fa_info->fib_priority == fi->fib_priority) { struct fib_alias *fa_orig; err = -EEXIST; @@ -1261,9 +1223,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(-1); - goto succeeded; + goto succeeded; } /* Error if we find a perfect match which * uses the same scope, type, and nexthop @@ -1285,7 +1247,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fa = fa_orig; } err = -ENOENT; - if (!(nlhdr->nlmsg_flags&NLM_F_CREATE)) + if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1298,9 +1260,6 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_type = type; new_fa->fa_scope = r->rtm_scope; new_fa->fa_state = 0; -#if 0 - new_fa->dst = NULL; -#endif /* * Insert new entry to the list. */ @@ -1314,8 +1273,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, write_lock_bh(&fib_lock); - list_add_tail(&new_fa->fa_list, - (fa ? &fa->fa_list : fa_head)); + list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); write_unlock_bh(&fib_lock); @@ -1328,7 +1286,7 @@ out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: fib_release_info(fi); -err:; +err: return err; } @@ -1342,7 +1300,6 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl struct hlist_node *node; hlist_for_each_entry(li, node, hhead, hlist) { - i = li->plen; mask = ntohl(inet_make_mask(i)); if (l->key != (key & mask)) @@ -1370,13 +1327,18 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result struct node *n; struct tnode *pn; int pos, bits; - t_key key=ntohl(flp->fl4_dst); + t_key key = ntohl(flp->fl4_dst); int chopped_off; t_key cindex = 0; int current_prefix_length = KEYLENGTH; + struct tnode *cn; + t_key node_prefix, key_prefix, pref_mismatch; + int mp; + n = t->trie; read_lock(&fib_lock); + if (!n) goto failed; @@ -1393,8 +1355,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result pn = (struct tnode *) n; chopped_off = 0; - while (pn) { - + while (pn) { pos = pn->pos; bits = pn->bits; @@ -1410,130 +1371,129 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result goto backtrace; } - if (IS_TNODE(n)) { -#define HL_OPTIMIZE -#ifdef HL_OPTIMIZE - struct tnode *cn = (struct tnode *)n; - t_key node_prefix, key_prefix, pref_mismatch; - int mp; - - /* - * It's a tnode, and we can do some extra checks here if we - * like, to avoid descending into a dead-end branch. - * This tnode is in the parent's child array at index - * key[p_pos..p_pos+p_bits] but potentially with some bits - * chopped off, so in reality the index may be just a - * subprefix, padded with zero at the end. - * We can also take a look at any skipped bits in this - * tnode - everything up to p_pos is supposed to be ok, - * and the non-chopped bits of the index (se previous - * paragraph) are also guaranteed ok, but the rest is - * considered unknown. - * - * The skipped bits are key[pos+bits..cn->pos]. - */ - - /* If current_prefix_length < pos+bits, we are already doing - * actual prefix matching, which means everything from - * pos+(bits-chopped_off) onward must be zero along some - * branch of this subtree - otherwise there is *no* valid - * prefix present. Here we can only check the skipped - * bits. Remember, since we have already indexed into the - * parent's child array, we know that the bits we chopped of - * *are* zero. - */ - - /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ - - if (current_prefix_length < pos+bits) { - if (tkey_extract_bits(cn->key, current_prefix_length, - cn->pos - current_prefix_length) != 0 || - !(cn->child[0])) - goto backtrace; - } - - /* - * If chopped_off=0, the index is fully validated and we - * only need to look at the skipped bits for this, the new, - * tnode. What we actually want to do is to find out if - * these skipped bits match our key perfectly, or if we will - * have to count on finding a matching prefix further down, - * because if we do, we would like to have some way of - * verifying the existence of such a prefix at this point. - */ - - /* The only thing we can do at this point is to verify that - * any such matching prefix can indeed be a prefix to our - * key, and if the bits in the node we are inspecting that - * do not match our key are not ZERO, this cannot be true. - * Thus, find out where there is a mismatch (before cn->pos) - * and verify that all the mismatching bits are zero in the - * new tnode's key. - */ - - /* Note: We aren't very concerned about the piece of the key - * that precede pn->pos+pn->bits, since these have already been - * checked. The bits after cn->pos aren't checked since these are - * by definition "unknown" at this point. Thus, what we want to - * see is if we are about to enter the "prefix matching" state, - * and in that case verify that the skipped bits that will prevail - * throughout this subtree are zero, as they have to be if we are - * to find a matching prefix. - */ - - node_prefix = MASK_PFX(cn->key, cn->pos); - key_prefix = MASK_PFX(key, cn->pos); - pref_mismatch = key_prefix^node_prefix; - mp = 0; - - /* In short: If skipped bits in this node do not match the search - * key, enter the "prefix matching" state.directly. - */ - if (pref_mismatch) { - while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { - mp++; - pref_mismatch = pref_mismatch <<1; - } - key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); - - if (key_prefix != 0) - goto backtrace; - - if (current_prefix_length >= cn->pos) - current_prefix_length=mp; - } -#endif - pn = (struct tnode *)n; /* Descend */ - chopped_off = 0; - continue; - } if (IS_LEAF(n)) { if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; - } + else + goto backtrace; + } + +#define HL_OPTIMIZE +#ifdef HL_OPTIMIZE + cn = (struct tnode *)n; + + /* + * It's a tnode, and we can do some extra checks here if we + * like, to avoid descending into a dead-end branch. + * This tnode is in the parent's child array at index + * key[p_pos..p_pos+p_bits] but potentially with some bits + * chopped off, so in reality the index may be just a + * subprefix, padded with zero at the end. + * We can also take a look at any skipped bits in this + * tnode - everything up to p_pos is supposed to be ok, + * and the non-chopped bits of the index (se previous + * paragraph) are also guaranteed ok, but the rest is + * considered unknown. + * + * The skipped bits are key[pos+bits..cn->pos]. + */ + + /* If current_prefix_length < pos+bits, we are already doing + * actual prefix matching, which means everything from + * pos+(bits-chopped_off) onward must be zero along some + * branch of this subtree - otherwise there is *no* valid + * prefix present. Here we can only check the skipped + * bits. Remember, since we have already indexed into the + * parent's child array, we know that the bits we chopped of + * *are* zero. + */ + + /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ + + if (current_prefix_length < pos+bits) { + if (tkey_extract_bits(cn->key, current_prefix_length, + cn->pos - current_prefix_length) != 0 || + !(cn->child[0])) + goto backtrace; + } + + /* + * If chopped_off=0, the index is fully validated and we + * only need to look at the skipped bits for this, the new, + * tnode. What we actually want to do is to find out if + * these skipped bits match our key perfectly, or if we will + * have to count on finding a matching prefix further down, + * because if we do, we would like to have some way of + * verifying the existence of such a prefix at this point. + */ + + /* The only thing we can do at this point is to verify that + * any such matching prefix can indeed be a prefix to our + * key, and if the bits in the node we are inspecting that + * do not match our key are not ZERO, this cannot be true. + * Thus, find out where there is a mismatch (before cn->pos) + * and verify that all the mismatching bits are zero in the + * new tnode's key. + */ + + /* Note: We aren't very concerned about the piece of the key + * that precede pn->pos+pn->bits, since these have already been + * checked. The bits after cn->pos aren't checked since these are + * by definition "unknown" at this point. Thus, what we want to + * see is if we are about to enter the "prefix matching" state, + * and in that case verify that the skipped bits that will prevail + * throughout this subtree are zero, as they have to be if we are + * to find a matching prefix. + */ + + node_prefix = MASK_PFX(cn->key, cn->pos); + key_prefix = MASK_PFX(key, cn->pos); + pref_mismatch = key_prefix^node_prefix; + mp = 0; + + /* In short: If skipped bits in this node do not match the search + * key, enter the "prefix matching" state.directly. + */ + if (pref_mismatch) { + while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { + mp++; + pref_mismatch = pref_mismatch <<1; + } + key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); + + if (key_prefix != 0) + goto backtrace; + + if (current_prefix_length >= cn->pos) + current_prefix_length = mp; + } +#endif + pn = (struct tnode *)n; /* Descend */ + chopped_off = 0; + continue; + backtrace: chopped_off++; /* As zero don't change the child key (cindex) */ - while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) { + while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) chopped_off++; - } /* Decrease current_... with bits chopped off */ if (current_prefix_length > pn->pos + pn->bits - chopped_off) current_prefix_length = pn->pos + pn->bits - chopped_off; - + /* * Either we do the actual chop off according or if we have * chopped off all bits in this tnode walk up to our parent. */ - if (chopped_off <= pn->bits) + if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); - else { + } else { if (NODE_PARENT(pn) == NULL) goto failed; - + /* Get Child's index */ cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); pn = NODE_PARENT(pn); @@ -1559,24 +1519,23 @@ static int trie_leaf_remove(struct trie *t, t_key key) struct node *n = t->trie; struct leaf *l; - if (trie_debug) - printk("entering trie_leaf_remove(%p)\n", n); + DBG("entering trie_leaf_remove(%p)\n", n); /* Note that in the case skipped bits, those bits are *not* checked! * When we finish this, we will have NULL or a T_LEAF, and the * T_LEAF may or may not match our key. */ - while (n != NULL && IS_TNODE(n)) { + while (n != NULL && IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); - if (n && NODE_PARENT(n) != tn) { - printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); - BUG(); - } - } + if (n && NODE_PARENT(n) != tn) { + printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); + BUG(); + } + } l = (struct leaf *) n; if (!n || !tkey_equals(l->key, key)) @@ -1597,8 +1556,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); t->trie = trie_rebalance(t, tp); - } - else + } else t->trie = NULL; return 1; @@ -1606,7 +1564,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) static int fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) + struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; @@ -1615,6 +1573,9 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; + int kill_li = 0; + struct leaf_info *li; + if (plen > 32) return -EINVAL; @@ -1624,7 +1585,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, memcpy(&key, rta->rta_dst, 4); key = ntohl(key); - mask = ntohl( inet_make_mask(plen) ); + mask = ntohl(inet_make_mask(plen)); if (key & ~mask) return -EINVAL; @@ -1641,8 +1602,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!fa) return -ESRCH; - if (trie_debug) - printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); + DBG("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; fa_head = fa->fa_list.prev; @@ -1664,39 +1624,36 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, } } - if (fa_to_delete) { - int kill_li = 0; - struct leaf_info *li; + if (!fa_to_delete) + return -ESRCH; - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); + fa = fa_to_delete; + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); - l = fib_find_node(t, key); - li = find_leaf_info(&l->list, plen); + l = fib_find_node(t, key); + li = find_leaf_info(&l->list, plen); - write_lock_bh(&fib_lock); + write_lock_bh(&fib_lock); - list_del(&fa->fa_list); + list_del(&fa->fa_list); - if (list_empty(fa_head)) { - hlist_del(&li->hlist); - kill_li = 1; - } - write_unlock_bh(&fib_lock); - - if (kill_li) - free_leaf_info(li); - - if (hlist_empty(&l->list)) - trie_leaf_remove(t, key); - - if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); - - fn_free_alias(fa); - return 0; + if (list_empty(fa_head)) { + hlist_del(&li->hlist); + kill_li = 1; } - return -ESRCH; + write_unlock_bh(&fib_lock); + + if (kill_li) + free_leaf_info(li); + + if (hlist_empty(&l->list)) + trie_leaf_remove(t, key); + + if (fa->fa_state & FA_S_ACCESSED) + rt_cache_flush(-1); + + fn_free_alias(fa); + return 0; } static int trie_flush_list(struct trie *t, struct list_head *head) @@ -1706,9 +1663,8 @@ static int trie_flush_list(struct trie *t, struct list_head *head) list_for_each_entry_safe(fa, fa_node, head, fa_list) { struct fib_info *fi = fa->fa_info; - - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { + if (fi && (fi->fib_flags&RTNH_F_DEAD)) { write_lock_bh(&fib_lock); list_del(&fa->fa_list); write_unlock_bh(&fib_lock); @@ -1728,11 +1684,9 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l) struct leaf_info *li = NULL; hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { - found += trie_flush_list(t, &li->falh); if (list_empty(&li->falh)) { - write_lock_bh(&fib_lock); hlist_del(&li->hlist); write_unlock_bh(&fib_lock); @@ -1757,8 +1711,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) return (struct leaf *) t->trie; p = (struct tnode*) t->trie; /* Start */ - } - else + } else p = (struct tnode *) NODE_PARENT(c); while (p) { @@ -1771,29 +1724,28 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) pos = 0; last = 1 << p->bits; - for(idx = pos; idx < last ; idx++) { - if (p->child[idx]) { + for (idx = pos; idx < last ; idx++) { + if (!p->child[idx]) + continue; - /* Decend if tnode */ + /* Decend if tnode */ + while (IS_TNODE(p->child[idx])) { + p = (struct tnode*) p->child[idx]; + idx = 0; - while (IS_TNODE(p->child[idx])) { - p = (struct tnode*) p->child[idx]; - idx = 0; - - /* Rightmost non-NULL branch */ - if (p && IS_TNODE(p)) - while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++; + /* Rightmost non-NULL branch */ + if (p && IS_TNODE(p)) + while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++; - /* Done with this tnode? */ - if (idx >= (1 << p->bits) || p->child[idx] == NULL ) - goto up; - } - return (struct leaf*) p->child[idx]; + /* Done with this tnode? */ + if (idx >= (1 << p->bits) || p->child[idx] == NULL) + goto up; } + return (struct leaf*) p->child[idx]; } up: /* No more children go up one step */ - c = (struct node*) p; + c = (struct node *) p; p = (struct tnode *) NODE_PARENT(p); } return NULL; /* Ready. Root of trie */ @@ -1807,7 +1759,7 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; - for (h=0; (l = nextleaf(t, l)) != NULL; h++) { + for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); if (ll && hlist_empty(&ll->list)) @@ -1818,12 +1770,11 @@ static int fn_trie_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); - if (trie_debug) - printk("trie_flush found=%d\n", found); + DBG("trie_flush found=%d\n", found); return found; } -static int trie_last_dflt=-1; +static int trie_last_dflt = -1; static void fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) @@ -1855,18 +1806,18 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib list_for_each_entry(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; - + if (fa->fa_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - + if (next_fi->fib_priority > res->fi->fib_priority) break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; fa->fa_state |= FA_S_ACCESSED; - + if (fi == NULL) { if (next_fi != res->fi) break; @@ -1913,9 +1864,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi int i, s_i; struct fib_alias *fa; - u32 xkey=htonl(key); + u32 xkey = htonl(key); - s_i=cb->args[3]; + s_i = cb->args[3]; i = 0; list_for_each_entry(fa, fah, fa_list) { @@ -1946,10 +1897,10 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi fa->fa_info, 0) < 0) { cb->args[3] = i; return -1; - } + } i++; } - cb->args[3]=i; + cb->args[3] = i; return skb->len; } @@ -1959,10 +1910,10 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str int h, s_h; struct list_head *fa_head; struct leaf *l = NULL; - s_h=cb->args[2]; - for (h=0; (l = nextleaf(t, l)) != NULL; h++) { + s_h = cb->args[2]; + for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) @@ -1970,7 +1921,7 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str sizeof(cb->args) - 3*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); - + if (!fa_head) continue; @@ -1978,11 +1929,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[2]=h; + cb->args[2] = h; return -1; } } - cb->args[2]=h; + cb->args[2] = h; return skb->len; } @@ -1994,13 +1945,12 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin s_m = cb->args[1]; read_lock(&fib_lock); - for (m=0; m<=32; m++) { - + for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + sizeof(cb->args) - 2*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { cb->args[1] = m; @@ -2010,7 +1960,7 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin read_unlock(&fib_lock); cb->args[1] = m; return skb->len; - out: +out: read_unlock(&fib_lock); return -1; } @@ -2051,9 +2001,9 @@ struct fib_table * __init fib_hash_init(int id) trie_init(t); if (id == RT_TABLE_LOCAL) - trie_local = t; + trie_local = t; else if (id == RT_TABLE_MAIN) - trie_main = t; + trie_main = t; if (id == RT_TABLE_LOCAL) printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); @@ -2065,7 +2015,8 @@ struct fib_table * __init fib_hash_init(int id) static void putspace_seq(struct seq_file *seq, int n) { - while (n--) seq_printf(seq, " "); + while (n--) + seq_printf(seq, " "); } static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) @@ -2086,29 +2037,22 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, seq_printf(seq, "%d/", cindex); printbin_seq(seq, cindex, bits); seq_printf(seq, ": "); - } - else + } else seq_printf(seq, ": "); seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); - if (IS_LEAF(n)) - seq_printf(seq, "key=%d.%d.%d.%d\n", - n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); - else { - int plen = ((struct tnode *)n)->pos; - t_key prf=MASK_PFX(n->key, plen); - seq_printf(seq, "key=%d.%d.%d.%d/%d\n", - prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); - } if (IS_LEAF(n)) { - struct leaf *l=(struct leaf *)n; + struct leaf *l = (struct leaf *)n; struct fib_alias *fa; int i; - for (i=32; i>=0; i--) - if (find_leaf_info(&l->list, i)) { - + + seq_printf(seq, "key=%d.%d.%d.%d\n", + n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); + + for (i = 32; i >= 0; i--) + if (find_leaf_info(&l->list, i)) { struct list_head *fa_head = get_fa_head(l, i); - + if (!fa_head) continue; @@ -2118,17 +2062,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, putspace_seq(seq, indent+2); seq_printf(seq, "{/%d...dumping}\n", i); - list_for_each_entry(fa, fa_head, fa_list) { putspace_seq(seq, indent+2); - if (fa->fa_info->fib_nh == NULL) { - seq_printf(seq, "Error _fib_nh=NULL\n"); - continue; - } if (fa->fa_info == NULL) { seq_printf(seq, "Error fa_info=NULL\n"); continue; } + if (fa->fa_info->fib_nh == NULL) { + seq_printf(seq, "Error _fib_nh=NULL\n"); + continue; + } seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", fa->fa_type, @@ -2136,11 +2079,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, fa->fa_tos); } } - } - else if (IS_TNODE(n)) { + } else { struct tnode *tn = (struct tnode *)n; + int plen = ((struct tnode *)n)->pos; + t_key prf = MASK_PFX(n->key, plen); + + seq_printf(seq, "key=%d.%d.%d.%d/%d\n", + prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); + putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos)); + seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); seq_printf(seq, "}\n"); putspace_seq(seq, indent); seq_printf(seq, "| "); @@ -2155,100 +2103,103 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, static void trie_dump_seq(struct seq_file *seq, struct trie *t) { struct node *n = t->trie; - int cindex=0; - int indent=1; - int pend=0; + int cindex = 0; + int indent = 1; + int pend = 0; int depth = 0; + struct tnode *tn; read_lock(&fib_lock); seq_printf(seq, "------ trie_dump of t=%p ------\n", t); - if (n) { - printnode_seq(seq, indent, n, pend, cindex, 0); - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent += 3; - depth++; - while (tn && cindex < (1 << tn->bits)) { - if (tn->child[cindex]) { - - /* Got a child */ - - printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - } - else { - /* - * New tnode. Decend one level - */ - - depth++; - n = tn->child[cindex]; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent+=3; - cindex=0; - } - } - else - cindex++; + if (!n) { + seq_printf(seq, "------ trie is empty\n"); - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - - /* - * Move upwards and test for root - * pop off all traversed nodes - */ - - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } - else { - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - indent-=3; - depth--; - } - } - } - } - else n = NULL; + read_unlock(&fib_lock); + return; + } + + printnode_seq(seq, indent, n, pend, cindex, 0); + + if (!IS_TNODE(n)) { + read_unlock(&fib_lock); + return; + } + + tn = (struct tnode *)n; + pend = tn->pos+tn->bits; + putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); + indent += 3; + depth++; + + while (tn && cindex < (1 << tn->bits)) { + if (tn->child[cindex]) { + /* Got a child */ + + printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); + if (IS_LEAF(tn->child[cindex])) { + cindex++; + } else { + /* + * New tnode. Decend one level + */ + + depth++; + tn = (struct tnode *)tn->child[cindex]; + pend = tn->pos + tn->bits; + putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); + indent += 3; + cindex = 0; + } + } else + cindex++; + + /* + * Test if we are done + */ + + while (cindex >= (1 << tn->bits)) { + /* + * Move upwards and test for root + * pop off all traversed nodes + */ + + if (NODE_PARENT(tn) == NULL) { + tn = NULL; + break; + } + + cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); + cindex++; + tn = NODE_PARENT(tn); + pend = tn->pos + tn->bits; + indent -= 3; + depth--; + } } - else seq_printf(seq, "------ trie is empty\n"); read_unlock(&fib_lock); } static struct trie_stat *trie_stat_new(void) { - struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); + struct trie_stat *s; int i; - if (s) { - s->totdepth = 0; - s->maxdepth = 0; - s->tnodes = 0; - s->leaves = 0; - s->nullpointers = 0; - - for(i=0; i< MAX_CHILDS; i++) - s->nodesizes[i] = 0; - } + s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); + if (!s) + return NULL; + + s->totdepth = 0; + s->maxdepth = 0; + s->tnodes = 0; + s->leaves = 0; + s->nullpointers = 0; + + for (i = 0; i < MAX_CHILDS; i++) + s->nodesizes[i] = 0; + return s; } @@ -2257,91 +2208,81 @@ static struct trie_stat *trie_collect_stats(struct trie *t) struct node *n = t->trie; struct trie_stat *s = trie_stat_new(); int cindex = 0; - int indent = 1; int pend = 0; int depth = 0; - read_lock(&fib_lock); + if (!s) + return NULL; + if (!n) + return s; - if (s) { - if (n) { - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - indent += 3; - s->nodesizes[tn->bits]++; - depth++; + read_lock(&fib_lock); - while (tn && cindex < (1 << tn->bits)) { - if (tn->child[cindex]) { - /* Got a child */ - - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - /* stats */ - if (depth > s->maxdepth) - s->maxdepth = depth; - s->totdepth += depth; - s->leaves++; - } - - else { - /* - * New tnode. Decend one level - */ - - s->tnodes++; - s->nodesizes[tn->bits]++; - depth++; - - n = tn->child[cindex]; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; + if (IS_TNODE(n)) { + struct tnode *tn = (struct tnode *)n; + pend = tn->pos+tn->bits; + s->nodesizes[tn->bits]++; + depth++; - indent += 3; - cindex = 0; - } - } - else { - cindex++; - s->nullpointers++; - } + while (tn && cindex < (1 << tn->bits)) { + if (tn->child[cindex]) { + /* Got a child */ + if (IS_LEAF(tn->child[cindex])) { + cindex++; + + /* stats */ + if (depth > s->maxdepth) + s->maxdepth = depth; + s->totdepth += depth; + s->leaves++; + } else { /* - * Test if we are done + * New tnode. Decend one level */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ + s->tnodes++; + s->nodesizes[tn->bits]++; + depth++; - - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } - else { - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - indent -= 3; - depth--; - } - } + n = tn->child[cindex]; + tn = (struct tnode *)n; + pend = tn->pos+tn->bits; + + cindex = 0; } + } else { + cindex++; + s->nullpointers++; } - else n = NULL; + + /* + * Test if we are done + */ + + while (cindex >= (1 << tn->bits)) { + /* + * Move upwards and test for root + * pop off all traversed nodes + */ + + if (NODE_PARENT(tn) == NULL) { + tn = NULL; + n = NULL; + break; + } + + cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); + tn = NODE_PARENT(tn); + cindex++; + n = (struct node *)tn; + pend = tn->pos+tn->bits; + depth--; + } } } - read_unlock(&fib_lock); + read_unlock(&fib_lock); return s; } @@ -2359,17 +2300,22 @@ static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) { - void *v = NULL; + if (!ip_fib_main_table) + return NULL; - if (ip_fib_main_table) - v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN; - return v; + if (*pos) + return fib_triestat_get_next(seq); + else + return SEQ_START_TOKEN; } static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq); + if (v == SEQ_START_TOKEN) + return fib_triestat_get_first(seq); + else + return fib_triestat_get_next(seq); } static void fib_triestat_seq_stop(struct seq_file *seq, void *v) @@ -2388,22 +2334,22 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) { int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ int i, max, pointers; - struct trie_stat *stat; + struct trie_stat *stat; int avdepth; stat = trie_collect_stats(t); - bytes=0; + bytes = 0; seq_printf(seq, "trie=%p\n", t); if (stat) { if (stat->leaves) - avdepth=stat->totdepth*100 / stat->leaves; + avdepth = stat->totdepth*100 / stat->leaves; else - avdepth=0; - seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + avdepth = 0; + seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); - + seq_printf(seq, "Leaves: %d\n", stat->leaves); bytes += sizeof(struct leaf) * stat->leaves; seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); @@ -2455,11 +2401,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) if (trie_main) collect_and_show(trie_main, seq); - } - else { - snprintf(bf, sizeof(bf), - "*\t%08X\t%08X", 200, 400); - + } else { + snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); + seq_printf(seq, "%-127s\n", bf); } return 0; @@ -2520,22 +2464,27 @@ static struct fib_alias *fib_trie_get_next(struct seq_file *seq) static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { - void *v = NULL; + if (!ip_fib_main_table) + return NULL; - if (ip_fib_main_table) - v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN; - return v; + if (*pos) + return fib_trie_get_next(seq); + else + return SEQ_START_TOKEN; } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq); + if (v == SEQ_START_TOKEN) + return fib_trie_get_first(seq); + else + return fib_trie_get_next(seq); + } static void fib_trie_seq_stop(struct seq_file *seq, void *v) { - } /* @@ -2555,9 +2504,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (trie_main) trie_dump_seq(seq, trie_main); - } - - else { + } else { snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); seq_printf(seq, "%-127s\n", bf); From 2f80b3c8262d0d646812f776db024d88d569a0c1 Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Tue, 9 Aug 2005 20:25:06 -0700 Subject: [PATCH 352/584] [IPV4]: fib_trie: Use ERR_PTR to handle errno return Signed-off-by: Robert Olsson Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 119 +++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 62 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6f818cc7efd0..914a4c2aae42 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -167,8 +167,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); static int tnode_child_length(struct tnode *tn); static struct node *resize(struct trie *t, struct tnode *tn); -static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err); -static struct tnode *halve(struct trie *t, struct tnode *tn, int *err); +static struct tnode *inflate(struct trie *t, struct tnode *tn); +static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); static void trie_dump_seq(struct seq_file *seq, struct trie *t); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); @@ -457,6 +457,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) { int i; int err = 0; + struct tnode *old_tn; if (!tn) return NULL; @@ -559,9 +560,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= inflate_threshold * tnode_child_length(tn))) { - tn = inflate(t, tn, &err); - - if (err) { + old_tn = tn; + tn = inflate(t, tn); + if (IS_ERR(tn)) { + tn = old_tn; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.resize_node_skipped++; #endif @@ -581,9 +583,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) 100 * (tnode_child_length(tn) - tn->empty_children) < halve_threshold * tnode_child_length(tn)) { - tn = halve(t, tn, &err); - - if (err) { + old_tn = tn; + tn = halve(t, tn); + if (IS_ERR(tn)) { + tn = old_tn; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.resize_node_skipped++; #endif @@ -618,7 +621,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) return (struct node *) tn; } -static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) +static struct tnode *inflate(struct trie *t, struct tnode *tn) { struct tnode *inode; struct tnode *oldtnode = tn; @@ -629,10 +632,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); - if (!tn) { - *err = -ENOMEM; - return oldtnode; - } + if (!tn) + return ERR_PTR(-ENOMEM); /* * Preallocate and store tnodes before the actual work so we @@ -653,39 +654,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) left = tnode_new(inode->key&(~m), inode->pos + 1, inode->bits - 1); - - if (!left) { - *err = -ENOMEM; - break; - } + if (!left) + goto nomem; right = tnode_new(inode->key|m, inode->pos + 1, inode->bits - 1); - if (!right) { - *err = -ENOMEM; - break; - } + if (!right) { + tnode_free(left); + goto nomem; + } put_child(t, tn, 2*i, (struct node *) left); put_child(t, tn, 2*i+1, (struct node *) right); } } - if (*err) { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - *err = -ENOMEM; - return oldtnode; - } - for (i = 0; i < olen; i++) { struct node *node = tnode_get_child(oldtnode, i); struct tnode *left, *right; @@ -763,9 +747,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) } tnode_free(oldtnode); return tn; +nomem: + { + int size = tnode_child_length(tn); + int j; + + for(j = 0; j < size; j++) + if (tn->child[j]) + tnode_free((struct tnode *)tn->child[j]); + + tnode_free(tn); + + return ERR_PTR(-ENOMEM); + } } -static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) +static struct tnode *halve(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; struct node *left, *right; @@ -776,10 +773,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); - if (!tn) { - *err = -ENOMEM; - return oldtnode; - } + if (!tn) + return ERR_PTR(-ENOMEM); /* * Preallocate and store tnodes before the actual work so we @@ -794,29 +789,16 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) /* Two nonempty children */ if (left && right) { - struct tnode *newBinNode = - tnode_new(left->key, tn->pos + tn->bits, 1); - - if (!newBinNode) { - *err = -ENOMEM; - break; - } - put_child(t, tn, i/2, (struct node *)newBinNode); + struct tnode *newn; + + newn = tnode_new(left->key, tn->pos + tn->bits, 1); + + if (!newn) + goto nomem; + + put_child(t, tn, i/2, (struct node *)newn); } - } - if (*err) { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - *err = -ENOMEM; - return oldtnode; } for (i = 0; i < olen; i += 2) { @@ -850,6 +832,19 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) } tnode_free(oldtnode); return tn; +nomem: + { + int size = tnode_child_length(tn); + int j; + + for(j = 0; j < size; j++) + if (tn->child[j]) + tnode_free((struct tnode *)tn->child[j]); + + tnode_free(tn); + + return ERR_PTR(-ENOMEM); + } } static void trie_init(struct trie *t) From bb435b8d816582064ee0ddb1e2a6fbca67f34108 Mon Sep 17 00:00:00 2001 From: Stephen Hemmigner Date: Tue, 9 Aug 2005 20:25:39 -0700 Subject: [PATCH 353/584] [IPV4]: fib_trie: Use const Use const where possible and get rid of EXTRACT() macro that was never used. Signed-off-by: Stephen Hemmigner Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 914a4c2aae42..395f64df6f9a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -77,7 +77,6 @@ #undef CONFIG_IP_FIB_TRIE_STATS #define MAX_CHILDS 16384 -#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n))) #define KEYLENGTH (8*sizeof(t_key)) #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) @@ -162,10 +161,8 @@ static int trie_debug = 0; #define DBG(x...) do { if (trie_debug) printk(x); } while (0) -static int tnode_full(struct tnode *tn, struct node *n); static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); -static int tnode_child_length(struct tnode *tn); static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); @@ -188,7 +185,7 @@ static inline struct node *tnode_get_child(struct tnode *tn, int i) return tn->child[i]; } -static inline int tnode_child_length(struct tnode *tn) +static inline int tnode_child_length(const struct tnode *tn) { return 1 << tn->bits; } @@ -400,7 +397,7 @@ static void tnode_free(struct tnode *tn) * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(struct tnode *tn, struct node *n) +static inline int tnode_full(const struct tnode *tn, const struct node *n) { if (n == NULL || IS_LEAF(n)) return 0; From 5a47a470e602eecb168ddd3b78841b84ceddd319 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:03 -0700 Subject: [PATCH 354/584] [DCCP]: make include-able from userspace The protocol header files in are usually structured in a way to be included by userspace code. The top section consists of general protocol structure definitions, typedefs, enums - followed by an #ifdef __KERNEL__ section. Currently doesn't follow that convention and can therefore not be used from userspace. However, for example iptables' libipt_dccp.c actually needs various definitions from there. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 238 ++++++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 117 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index e3b4bf7346bb..add4908b8e57 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -1,16 +1,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H -#include -#include #include -#include -#include - -#include -#include -#include -#include +#include /* FIXME: this is utterly wrong */ struct sockaddr_dccp { @@ -18,40 +10,6 @@ struct sockaddr_dccp { unsigned int service; }; -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: - This mapping is horrible, but TCP has - no matching state for DCCP_PARTOPEN, - as TCP_SYN_RECV is already used by - DCCP_RESPOND, why don't stop using TCP - mapping of states? OK, now we don't use - sk_stream_sendmsg anymore, so doesn't - seem to exist any reason for us to - do the TCP mapping here */ - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_MAX_STATES = TCP_MAX_STATES, -}; - -#define DCCP_STATE_MASK 0xf -#define DCCP_ACTION_FIN (1<<7) - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_PARTOPEN = TCPF_FIN_WAIT1, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, -}; - /** * struct dccp_hdr - generic part of DCCP packet header * @@ -94,11 +52,6 @@ struct dccp_hdr { #endif }; -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb->h.raw; -} - /** * struct dccp_hdr_ext - the low bits of a 48 bit seq packet * @@ -108,34 +61,6 @@ struct dccp_hdr_ext { __u32 dccph_seq_low; }; -static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); -} - -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); -} - -static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq << 8); -#elif defined(__BIG_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq); -#else -#error "Adjust your defines" -#endif - - if (dh->dccph_x != 0) - seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); - - return seq_nr; -} - /** * struct dccp_hdr_request - Conection initiation request header * @@ -145,12 +70,6 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) struct dccp_hdr_request { __u32 dccph_req_service; }; - -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets * @@ -162,24 +81,6 @@ struct dccp_hdr_ack_bits { dccph_ack_nr_high:24; __u32 dccph_ack_nr_low; }; - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#elif defined(__BIG_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#else -#error "Adjust your defines" -#endif -} - /** * struct dccp_hdr_response - Conection initiation response header * @@ -193,11 +94,6 @@ struct dccp_hdr_response { __u32 dccph_resp_service; }; -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_reset - Unconditionally shut down a connection * @@ -210,11 +106,6 @@ struct dccp_hdr_reset { dccph_reset_data[3]; }; -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - enum dccp_pkt_type { DCCP_PKT_REQUEST = 0, DCCP_PKT_RESPONSE, @@ -248,13 +139,6 @@ static inline unsigned int dccp_packet_hdr_len(const __u8 type) return sizeof(struct dccp_hdr_response); return sizeof(struct dccp_hdr_reset); } - -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); -} - enum dccp_reset_codes { DCCP_RESET_CODE_UNSPECIFIED = 0, DCCP_RESET_CODE_CLOSED, @@ -298,6 +182,124 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#include +#include +#include +#include + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + + /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 /* FIXME: for now we're using CCID 3 (TFRC) */ @@ -429,4 +431,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } +#endif /* __KERNEL__ */ + #endif /* _LINUX_DCCP_H */ From e2e268665f6c01686b477a6b0cc5a70bab689d54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:26:28 -0700 Subject: [PATCH 355/584] [DCCP]: Fix struct sockaddr_dccp definition Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index add4908b8e57..fd1412ddb3ff 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -4,10 +4,14 @@ #include #include -/* FIXME: this is utterly wrong */ +/* Structure describing an Internet (DCCP) socket address. */ struct sockaddr_dccp { - struct sockaddr_in in; - unsigned int service; + __u16 sdccp_family; /* Address family */ + __u16 sdccp_port; /* Port number */ + __u32 sdccp_addr; /* Internet address */ + __u32 sdccp_service; /* Service */ + /* Pad to size of `struct sockaddr': 16 bytes . */ + __u32 sdccp_pad; }; /** From 1d3de414eb20d937d82c5219fd13ee4cedc499cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:55 -0700 Subject: [PATCH 356/584] [NETFILTER]: New iptables DCCP protocol header match Using this new iptables DCCP protocol header match, it is possible to create simplistic stateless packet filtering rules for DCCP. It permits matching of port numbers, packet type and options. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 16 ++- include/linux/netfilter_ipv4/ipt_dccp.h | 23 ++++ net/ipv4/netfilter/Kconfig | 11 ++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_dccp.c | 176 ++++++++++++++++++++++++ 5 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h create mode 100644 net/ipv4/netfilter/ipt_dccp.c diff --git a/include/linux/dccp.h b/include/linux/dccp.h index fd1412ddb3ff..431d58923ba9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -242,10 +242,15 @@ static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); } +static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) +{ + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); + return __dccp_basic_hdr_len(dh); } static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) @@ -297,10 +302,15 @@ static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); } +static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) +{ + return __dccp_basic_hdr_len(dh) + + dccp_packet_hdr_len(dh->dccph_type); +} + static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) { - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); + return __dccp_hdr_len(dccp_hdr(skb)); } diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h new file mode 100644 index 000000000000..3cb3a522e62b --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_dccp.h @@ -0,0 +1,23 @@ +#ifndef _IPT_DCCP_H_ +#define _IPT_DCCP_H_ + +#define IPT_DCCP_SRC_PORTS 0x01 +#define IPT_DCCP_DEST_PORTS 0x02 +#define IPT_DCCP_TYPE 0x04 +#define IPT_DCCP_OPTION 0x08 + +#define IPT_DCCP_VALID_FLAGS 0x0f + +struct ipt_dccp_info { + u_int16_t dpts[2]; /* Min, Max */ + u_int16_t spts[2]; /* Min, Max */ + + u_int16_t flags; + u_int16_t invflags; + + u_int16_t typemask; + u_int8_t option; +}; + +#endif /* _IPT_DCCP_H_ */ + diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2fa26a41fa47..9f5e1d769b5f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -354,6 +354,17 @@ config IP_NF_MATCH_SCTP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config IP_NF_MATCH_DCCP + tristate 'DCCP protocol match support' + depends on IP_NF_IPTABLES + help + With this option enabled, you will be able to use the iptables + `dccp' match in order to match on DCCP source/destination ports + and DCCP flags. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config IP_NF_MATCH_COMMENT tristate 'comment match support' depends on IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c2ae663b723f..58aa7c616e1f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o +obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c new file mode 100644 index 000000000000..ad3278bba6c1 --- /dev/null +++ b/net/ipv4/netfilter/ipt_dccp.c @@ -0,0 +1,176 @@ +/* + * iptables module for DCCP protocol header matching + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ + || (!!((invflag) & (option)) ^ (cond))) + +static unsigned char *dccp_optbuf; +static DEFINE_SPINLOCK(dccp_buflock); + +static inline int +dccp_find_option(u_int8_t option, + const struct sk_buff *skb, + const struct dccp_hdr *dh, + int *hotdrop) +{ + /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ + unsigned char *op; + unsigned int optoff = __dccp_hdr_len(dh); + unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); + unsigned int i; + + if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { + *hotdrop = 1; + return 0; + } + + if (!optlen) + return 0; + + spin_lock_bh(&dccp_buflock); + op = skb_header_pointer(skb, + skb->nh.iph->ihl*4 + optoff, + optlen, dccp_optbuf); + if (op == NULL) { + /* If we don't have the whole header, drop packet. */ + spin_unlock_bh(&dccp_buflock); + *hotdrop = 1; + return 0; + } + + for (i = 0; i < optlen; ) { + if (op[i] == option) { + spin_unlock_bh(&dccp_buflock); + return 1; + } + + if (op[i] < 2) + i++; + else + i += op[i+1]?:1; + } + + spin_unlock_bh(&dccp_buflock); + return 0; +} + + +static inline int +match_types(const struct dccp_hdr *dh, u_int16_t typemask) +{ + return (typemask & (1 << dh->dccph_type)); +} + +static inline int +match_option(u_int8_t option, const struct sk_buff *skb, + const struct dccp_hdr *dh, int *hotdrop) +{ + return dccp_find_option(option, skb, dh, hotdrop); +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_dccp_info *info = + (const struct ipt_dccp_info *)matchinfo; + struct dccp_hdr _dh, *dh; + + if (offset) + return 0; + + dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh); + if (dh == NULL) { + *hotdrop = 1; + return 0; + } + + return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) + && (ntohs(dh->dccph_sport) <= info->spts[1])), + IPT_DCCP_SRC_PORTS, info->flags, info->invflags) + && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) + && (ntohs(dh->dccph_dport) <= info->dpts[1])), + IPT_DCCP_DEST_PORTS, info->flags, info->invflags) + && DCCHECK(match_types(dh, info->typemask), + IPT_DCCP_TYPE, info->flags, info->invflags) + && DCCHECK(match_option(info->option, skb, dh, hotdrop), + IPT_DCCP_OPTION, info->flags, info->invflags); +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_dccp_info *info; + + info = (const struct ipt_dccp_info *)matchinfo; + + return ip->proto == IPPROTO_DCCP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info)) + && !(info->flags & ~IPT_DCCP_VALID_FLAGS) + && !(info->invflags & ~IPT_DCCP_VALID_FLAGS) + && !(info->invflags & ~info->flags); +} + +static struct ipt_match dccp_match = +{ + .name = "dccp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + int ret; + + /* doff is 8 bits, so the maximum option size is (4*256). Don't put + * this in BSS since DaveM is worried about locked TLB's for kernel + * BSS. */ + dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); + if (!dccp_optbuf) + return -ENOMEM; + ret = ipt_register_match(&dccp_match); + if (ret) + kfree(dccp_optbuf); + + return ret; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&dccp_match); + kfree(dccp_optbuf); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Match for DCCP protocol packets"); + From f6ccf55419c4f0021e7382f000f2fd14a29f3d3c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 20:27:14 -0700 Subject: [PATCH 357/584] [DCCP]: Fix u64 printf format warnings. Signed-off-by: David S. Miller --- net/dccp/input.c | 4 +++- net/dccp/ipv4.c | 6 ++++-- net/dccp/minisocks.c | 5 ++++- net/dccp/options.c | 35 +++++++++++++++++++++++++---------- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 622e976a51fe..76c3401e93a5 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -274,7 +274,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", - dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + (unsigned long long) dp->dccps_awl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); goto out_invalid_packet; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7b90606ec10e..4fa56dbcbea4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1092,14 +1092,16 @@ int dccp_v4_rcv(struct sk_buff *skb) dccp_packet_name(dh->dccph_type), NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), - DCCP_SKB_CB(skb)->dccpd_seq); + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_pr_debug_cat(", ack=%llu\n", + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 810f0c293b85..e498e389fccc 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -142,7 +142,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Invalid ACK */ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", - DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) + dccp_rsk(req)->dreq_iss); goto drop; } diff --git a/net/dccp/options.c b/net/dccp/options.c index e1867767946c..9ca32cba83af 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -119,7 +119,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ack_vector_idx = value - options; dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", - debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); + debug_prefix, len, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq); dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, @@ -137,6 +139,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; case DCCPO_TIMESTAMP_ECHO: @@ -147,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", debug_prefix, opt_recv->dccpor_timestamp_echo, - len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, + len + 2, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, tcp_time_stamp - opt_recv->dccpor_timestamp_echo); opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); @@ -308,7 +313,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", debug_prefix, elapsed_time, - len, DCCP_SKB_CB(skb)->dccpd_seq); + len, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); } EXPORT_SYMBOL(dccp_insert_option_elapsed_time); @@ -382,7 +388,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", debug_prefix, ap->dccpap_ack_vector_len, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); } static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) @@ -422,7 +429,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *s dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", debug_prefix, dp->dccps_timestamp_echo, - len, DCCP_SKB_CB(skb)->dccpd_seq); + len, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); dp->dccps_timestamp_echo = 0; dp->dccps_timestamp_time = 0; @@ -607,7 +615,8 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) */ if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && len == 0 && delta == 0) { /* Found our reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", ackno); + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long) ackno); ap->dccpap_buf[index] = state; goto out; } @@ -630,7 +639,8 @@ out: out_duplicate: /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", + (unsigned long long) ackno); return -EILSEQ; } @@ -640,7 +650,8 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) if (!dccp_debug) return; - printk("ACK vector len=%d, ackno=%llu |", len, ackno); + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long) ackno); while (len--) { const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; @@ -693,7 +704,8 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, #endif dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", debug_prefix, 1, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); dccp_ackpkts_trow_away_ack_record(ap); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; } @@ -745,7 +757,10 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, #endif dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", debug_prefix, len, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) + ap->dccpap_ack_seqno, + (unsigned long long) + ap->dccpap_ack_ackno); dccp_ackpkts_trow_away_ack_record(ap); } /* From 4aa769b99724953a6f322c648c0cfbe8c6616382 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:27:37 -0700 Subject: [PATCH 358/584] [Bluetooth]: Update and cleanup of the virtual HCI driver This patch cleans up the virtual HCI driver. It also adds support for the dynamic minor device number allocation. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- drivers/bluetooth/hci_vhci.c | 374 ++++++++++++++++++----------------- drivers/bluetooth/hci_vhci.h | 50 ----- 2 files changed, 197 insertions(+), 227 deletions(-) delete mode 100644 drivers/bluetooth/hci_vhci.h diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f9b956fb2b8b..4aa5dfff12be 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -1,229 +1,220 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated - - Written 2000,2001 by Maxim Krasnyansky - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - /* - * Bluetooth HCI virtual device driver. * - * $Id: hci_vhci.c,v 1.3 2002/04/17 17:37:20 maxk Exp $ + * Bluetooth virtual HCI driver + * + * Copyright (C) 2000-2001 Qualcomm Incorporated + * Copyright (C) 2002-2003 Maxim Krasnyansky + * Copyright (C) 2004-2005 Marcel Holtmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * */ -#define VERSION "1.1" #include #include -#include #include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include +#include +#include #include #include -#include -#include - #include #include -#include "hci_vhci.h" -/* HCI device part */ +#ifndef CONFIG_BT_HCIVHCI_DEBUG +#undef BT_DBG +#define BT_DBG(D...) +#endif -static int hci_vhci_open(struct hci_dev *hdev) +#define VERSION "1.2" + +static int minor = MISC_DYNAMIC_MINOR; + +struct vhci_data { + struct hci_dev *hdev; + + unsigned long flags; + + wait_queue_head_t read_wait; + struct sk_buff_head readq; + + struct fasync_struct *fasync; +}; + +#define VHCI_FASYNC 0x0010 + +static struct miscdevice vhci_miscdev; + +static int vhci_open_dev(struct hci_dev *hdev) { set_bit(HCI_RUNNING, &hdev->flags); + return 0; } -static int hci_vhci_flush(struct hci_dev *hdev) +static int vhci_close_dev(struct hci_dev *hdev) { - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) hdev->driver_data; - skb_queue_purge(&hci_vhci->readq); - return 0; -} + struct vhci_data *vhci = hdev->driver_data; -static int hci_vhci_close(struct hci_dev *hdev) -{ if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags)) return 0; - hci_vhci_flush(hdev); + skb_queue_purge(&vhci->readq); + return 0; } -static void hci_vhci_destruct(struct hci_dev *hdev) +static int vhci_flush(struct hci_dev *hdev) { - struct hci_vhci_struct *vhci; + struct vhci_data *vhci = hdev->driver_data; - if (!hdev) return; + skb_queue_purge(&vhci->readq); - vhci = (struct hci_vhci_struct *) hdev->driver_data; - kfree(vhci); + return 0; } -static int hci_vhci_send_frame(struct sk_buff *skb) +static int vhci_send_frame(struct sk_buff *skb) { struct hci_dev* hdev = (struct hci_dev *) skb->dev; - struct hci_vhci_struct *hci_vhci; + struct vhci_data *vhci; if (!hdev) { - BT_ERR("Frame for uknown device (hdev=NULL)"); + BT_ERR("Frame for unknown HCI device (hdev=NULL)"); return -ENODEV; } if (!test_bit(HCI_RUNNING, &hdev->flags)) return -EBUSY; - hci_vhci = (struct hci_vhci_struct *) hdev->driver_data; + vhci = hdev->driver_data; memcpy(skb_push(skb, 1), &skb->pkt_type, 1); - skb_queue_tail(&hci_vhci->readq, skb); + skb_queue_tail(&vhci->readq, skb); - if (hci_vhci->flags & VHCI_FASYNC) - kill_fasync(&hci_vhci->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&hci_vhci->read_wait); + if (vhci->flags & VHCI_FASYNC) + kill_fasync(&vhci->fasync, SIGIO, POLL_IN); + + wake_up_interruptible(&vhci->read_wait); return 0; } -/* Character device part */ - -/* Poll */ -static unsigned int hci_vhci_chr_poll(struct file *file, poll_table * wait) -{ - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; - - poll_wait(file, &hci_vhci->read_wait, wait); - - if (!skb_queue_empty(&hci_vhci->readq)) - return POLLIN | POLLRDNORM; - - return POLLOUT | POLLWRNORM; +static void vhci_destruct(struct hci_dev *hdev) +{ + kfree(hdev->driver_data); } -/* Get packet from user space buffer(already verified) */ -static inline ssize_t hci_vhci_get_user(struct hci_vhci_struct *hci_vhci, const char __user *buf, size_t count) +static inline ssize_t vhci_get_user(struct vhci_data *vhci, + const char __user *buf, size_t count) { struct sk_buff *skb; if (count > HCI_MAX_FRAME_SIZE) return -EINVAL; - if (!(skb = bt_skb_alloc(count, GFP_KERNEL))) + skb = bt_skb_alloc(count, GFP_KERNEL); + if (!skb) return -ENOMEM; - + if (copy_from_user(skb_put(skb, count), buf, count)) { kfree_skb(skb); return -EFAULT; } - skb->dev = (void *) hci_vhci->hdev; + skb->dev = (void *) vhci->hdev; skb->pkt_type = *((__u8 *) skb->data); skb_pull(skb, 1); hci_recv_frame(skb); return count; -} - -/* Write */ -static ssize_t hci_vhci_chr_write(struct file * file, const char __user * buf, - size_t count, loff_t *pos) -{ - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - return hci_vhci_get_user(hci_vhci, buf, count); } -/* Put packet to user space buffer(already verified) */ -static inline ssize_t hci_vhci_put_user(struct hci_vhci_struct *hci_vhci, - struct sk_buff *skb, char __user *buf, - int count) +static inline ssize_t vhci_put_user(struct vhci_data *vhci, + struct sk_buff *skb, char __user *buf, int count) { - int len = count, total = 0; char __user *ptr = buf; + int len, total = 0; + + len = min_t(unsigned int, skb->len, count); - len = min_t(unsigned int, skb->len, len); if (copy_to_user(ptr, skb->data, len)) return -EFAULT; + total += len; - hci_vhci->hdev->stat.byte_tx += len; + vhci->hdev->stat.byte_tx += len; + switch (skb->pkt_type) { case HCI_COMMAND_PKT: - hci_vhci->hdev->stat.cmd_tx++; + vhci->hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: - hci_vhci->hdev->stat.acl_tx++; + vhci->hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: - hci_vhci->hdev->stat.cmd_tx++; + vhci->hdev->stat.cmd_tx++; break; }; return total; } -/* Read */ -static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t count, loff_t *pos) +static loff_t vhci_llseek(struct file * file, loff_t offset, int origin) +{ + return -ESPIPE; +} + +static ssize_t vhci_read(struct file * file, char __user * buf, size_t count, loff_t *pos) { - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; DECLARE_WAITQUEUE(wait, current); + struct vhci_data *vhci = file->private_data; struct sk_buff *skb; ssize_t ret = 0; - add_wait_queue(&hci_vhci->read_wait, &wait); + add_wait_queue(&vhci->read_wait, &wait); while (count) { set_current_state(TASK_INTERRUPTIBLE); - /* Read frames from device queue */ - if (!(skb = skb_dequeue(&hci_vhci->readq))) { + skb = skb_dequeue(&vhci->readq); + if (!skb) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } + if (signal_pending(current)) { ret = -ERESTARTSYS; break; } - /* Nothing to read, let's sleep */ schedule(); continue; } if (access_ok(VERIFY_WRITE, buf, count)) - ret = hci_vhci_put_user(hci_vhci, skb, buf, count); + ret = vhci_put_user(vhci, skb, buf, count); else ret = -EFAULT; @@ -231,84 +222,90 @@ static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t c break; } set_current_state(TASK_RUNNING); - remove_wait_queue(&hci_vhci->read_wait, &wait); + remove_wait_queue(&vhci->read_wait, &wait); return ret; } -static loff_t hci_vhci_chr_lseek(struct file * file, loff_t offset, int origin) +static ssize_t vhci_write(struct file *file, + const char __user *buf, size_t count, loff_t *pos) { - return -ESPIPE; + struct vhci_data *vhci = file->private_data; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + return vhci_get_user(vhci, buf, count); } -static int hci_vhci_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +static unsigned int vhci_poll(struct file *file, poll_table *wait) +{ + struct vhci_data *vhci = file->private_data; + + poll_wait(file, &vhci->read_wait, wait); + + if (!skb_queue_empty(&vhci->readq)) + return POLLIN | POLLRDNORM; + + return POLLOUT | POLLWRNORM; +} + +static int vhci_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) { return -EINVAL; } -static int hci_vhci_chr_fasync(int fd, struct file *file, int on) +static int vhci_open(struct inode *inode, struct file *file) { - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; - int ret; - - if ((ret = fasync_helper(fd, file, on, &hci_vhci->fasync)) < 0) - return ret; - - if (on) - hci_vhci->flags |= VHCI_FASYNC; - else - hci_vhci->flags &= ~VHCI_FASYNC; - - return 0; -} - -static int hci_vhci_chr_open(struct inode *inode, struct file * file) -{ - struct hci_vhci_struct *hci_vhci = NULL; + struct vhci_data *vhci; struct hci_dev *hdev; - if (!(hci_vhci = kmalloc(sizeof(struct hci_vhci_struct), GFP_KERNEL))) + vhci = kmalloc(sizeof(struct vhci_data), GFP_KERNEL); + if (!vhci) return -ENOMEM; - memset(hci_vhci, 0, sizeof(struct hci_vhci_struct)); + memset(vhci, 0, sizeof(struct vhci_data)); - skb_queue_head_init(&hci_vhci->readq); - init_waitqueue_head(&hci_vhci->read_wait); + skb_queue_head_init(&vhci->readq); + init_waitqueue_head(&vhci->read_wait); - /* Initialize and register HCI device */ hdev = hci_alloc_dev(); if (!hdev) { - kfree(hci_vhci); + kfree(vhci); return -ENOMEM; } - hci_vhci->hdev = hdev; + vhci->hdev = hdev; hdev->type = HCI_VHCI; - hdev->driver_data = hci_vhci; + hdev->driver_data = vhci; + SET_HCIDEV_DEV(hdev, vhci_miscdev.dev); - hdev->open = hci_vhci_open; - hdev->close = hci_vhci_close; - hdev->flush = hci_vhci_flush; - hdev->send = hci_vhci_send_frame; - hdev->destruct = hci_vhci_destruct; + hdev->open = vhci_open_dev; + hdev->close = vhci_close_dev; + hdev->flush = vhci_flush; + hdev->send = vhci_send_frame; + hdev->destruct = vhci_destruct; hdev->owner = THIS_MODULE; - + if (hci_register_dev(hdev) < 0) { - kfree(hci_vhci); + BT_ERR("Can't register HCI device"); + kfree(vhci); hci_free_dev(hdev); return -EBUSY; } - file->private_data = hci_vhci; - return nonseekable_open(inode, file); + file->private_data = vhci; + + return nonseekable_open(inode, file); } -static int hci_vhci_chr_close(struct inode *inode, struct file *file) +static int vhci_release(struct inode *inode, struct file *file) { - struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; - struct hci_dev *hdev = hci_vhci->hdev; + struct vhci_data *vhci = file->private_data; + struct hci_dev *hdev = vhci->hdev; if (hci_unregister_dev(hdev) < 0) { BT_ERR("Can't unregister HCI device %s", hdev->name); @@ -317,48 +314,71 @@ static int hci_vhci_chr_close(struct inode *inode, struct file *file) hci_free_dev(hdev); file->private_data = NULL; + return 0; } -static struct file_operations hci_vhci_fops = { - .owner = THIS_MODULE, - .llseek = hci_vhci_chr_lseek, - .read = hci_vhci_chr_read, - .write = hci_vhci_chr_write, - .poll = hci_vhci_chr_poll, - .ioctl = hci_vhci_chr_ioctl, - .open = hci_vhci_chr_open, - .release = hci_vhci_chr_close, - .fasync = hci_vhci_chr_fasync +static int vhci_fasync(int fd, struct file *file, int on) +{ + struct vhci_data *vhci = file->private_data; + int err; + + err = fasync_helper(fd, file, on, &vhci->fasync); + if (err < 0) + return err; + + if (on) + vhci->flags |= VHCI_FASYNC; + else + vhci->flags &= ~VHCI_FASYNC; + + return 0; +} + +static struct file_operations vhci_fops = { + .owner = THIS_MODULE, + .llseek = vhci_llseek, + .read = vhci_read, + .write = vhci_write, + .poll = vhci_poll, + .ioctl = vhci_ioctl, + .open = vhci_open, + .release = vhci_release, + .fasync = vhci_fasync, }; -static struct miscdevice hci_vhci_miscdev= -{ - VHCI_MINOR, - "hci_vhci", - &hci_vhci_fops +static struct miscdevice vhci_miscdev= { + .name = "vhci", + .fops = &vhci_fops, }; -static int __init hci_vhci_init(void) +static int __init vhci_init(void) { - BT_INFO("VHCI driver ver %s", VERSION); + BT_INFO("Virtual HCI driver ver %s", VERSION); - if (misc_register(&hci_vhci_miscdev)) { - BT_ERR("Can't register misc device %d\n", VHCI_MINOR); + vhci_miscdev.minor = minor; + + if (misc_register(&vhci_miscdev) < 0) { + BT_ERR("Can't register misc device with minor %d", minor); return -EIO; } return 0; } -static void hci_vhci_cleanup(void) +static void __exit vhci_exit(void) { - misc_deregister(&hci_vhci_miscdev); + if (misc_deregister(&vhci_miscdev) < 0) + BT_ERR("Can't unregister misc device with minor %d", minor); } -module_init(hci_vhci_init); -module_exit(hci_vhci_cleanup); +module_init(vhci_init); +module_exit(vhci_exit); -MODULE_AUTHOR("Maxim Krasnyansky "); -MODULE_DESCRIPTION("Bluetooth VHCI driver ver " VERSION); -MODULE_LICENSE("GPL"); +module_param(minor, int, 0444); +MODULE_PARM_DESC(minor, "Miscellaneous minor device number"); + +MODULE_AUTHOR("Maxim Krasnyansky , Marcel Holtmann "); +MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/bluetooth/hci_vhci.h b/drivers/bluetooth/hci_vhci.h deleted file mode 100644 index 53b11f9ef76d..000000000000 --- a/drivers/bluetooth/hci_vhci.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated - - Written 2000,2001 by Maxim Krasnyansky - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -/* - * $Id: hci_vhci.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $ - */ - -#ifndef __HCI_VHCI_H -#define __HCI_VHCI_H - -#ifdef __KERNEL__ - -struct hci_vhci_struct { - struct hci_dev *hdev; - __u32 flags; - wait_queue_head_t read_wait; - struct sk_buff_head readq; - struct fasync_struct *fasync; -}; - -/* VHCI device flags */ -#define VHCI_FASYNC 0x0010 - -#endif /* __KERNEL__ */ - -#define VHCI_DEV "/dev/vhci" -#define VHCI_MINOR 250 - -#endif /* __HCI_VHCI_H */ From 45bb4bf08b9c16122af84d3f26a018c8022b24e5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:27:49 -0700 Subject: [PATCH 359/584] [Bluetooth]: Workaround for inquiry results with RSSI and page scan mode This patch implements a workaround for buggy Bluetooth 1.2 devices from Silicon Wave. Their inquiry results with RSSI contain the page scan mode field. This field was removed in the final Bluetooth 1.2 specification. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/hci.h | 9 ++++++ net/bluetooth/hci_event.c | 55 ++++++++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6f0706f4af68..cd075f197983 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -453,6 +453,15 @@ struct inquiry_info_with_rssi { __u16 clock_offset; __s8 rssi; } __attribute__ ((packed)); +struct inquiry_info_with_rssi_and_pscan_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; + __u8 pscan_period_mode; + __u8 pscan_mode; + __u8 dev_class[3]; + __u16 clock_offset; + __s8 rssi; +} __attribute__ ((packed)); #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 46367bd129c3..632f7a9c2bcb 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -484,14 +484,18 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff /* Inquiry Result */ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct inquiry_data data; struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1); int num_rsp = *((__u8 *) skb->data); BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + if (!num_rsp) + return; + hci_dev_lock(hdev); + for (; num_rsp; num_rsp--) { - struct inquiry_data data; bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; data.pscan_period_mode = info->pscan_period_mode; @@ -502,30 +506,55 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * info++; hci_inquiry_cache_update(hdev, &data); } + hci_dev_unlock(hdev); } /* Inquiry Result With RSSI */ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct inquiry_info_with_rssi *info = (struct inquiry_info_with_rssi *) (skb->data + 1); + struct inquiry_data data; int num_rsp = *((__u8 *) skb->data); BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + if (!num_rsp) + return; + hci_dev_lock(hdev); - for (; num_rsp; num_rsp--) { - struct inquiry_data data; - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; - info++; - hci_inquiry_cache_update(hdev, &data); + + if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { + struct inquiry_info_with_rssi_and_pscan_mode *info = + (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = info->pscan_mode; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + } else { + struct inquiry_info_with_rssi *info = + (struct inquiry_info_with_rssi *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } } + hci_dev_unlock(hdev); } From 85a1e930bf628700e8e9c166b1f5c1c26d3651cc Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:28:02 -0700 Subject: [PATCH 360/584] [Bluetooth]: Track page scan repetition mode changes The HCI page scan repetition mode change event contains the actual page scan repetition mode for the remote device. It is the same value that is received from an inquiry response and it can be used to make further reconnections faster. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/hci.h | 6 ++++++ net/bluetooth/hci_event.c | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cd075f197983..371e7d3f2e6f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -593,6 +593,12 @@ struct hci_ev_clock_offset { __u16 clock_offset; } __attribute__ ((packed)); +#define HCI_EV_PSCAN_REP_MODE 0x20 +struct hci_ev_pscan_rep_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; +} __attribute__ ((packed)); + /* Internal events generated by Bluetooth stack */ #define HCI_EV_STACK_INTERNAL 0xFD struct hci_ev_stack_internal { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 632f7a9c2bcb..a004284c4d98 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -894,6 +894,24 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_unlock(hdev); } +/* Page Scan Repetition Mode */ +static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data; + struct inquiry_entry *ie; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) { + ie->data.pscan_rep_mode = ev->pscan_rep_mode; + ie->timestamp = jiffies; + } + + hci_dev_unlock(hdev); +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; @@ -966,6 +984,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_clock_offset_evt(hdev, skb); break; + case HCI_EV_PSCAN_REP_MODE: + hci_pscan_rep_mode_evt(hdev, skb); + break; + case HCI_EV_CMD_STATUS: cs = (struct hci_ev_cmd_status *) skb->data; skb_pull(skb, sizeof(cs)); From 7b9eb9e2099f6f4acd6a36bcd7820d27c3cf5ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=E4s?= Date: Tue, 9 Aug 2005 20:28:21 -0700 Subject: [PATCH 361/584] [Bluetooth]: Call tty_hangup() when DCD is de-asserted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RFCOMM layer does not handle properly the de-assertation of CD signal. It should call tty_hangup() to work properly. Signed-off-by: Timo Teräs Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/tty.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6304590fd36a..67d9dd6b0fac 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -528,9 +528,14 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) struct rfcomm_dev *dev = dlc->owner; if (!dev) return; - + BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig); + if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) { + if (dev->tty && !C_CLOCAL(dev->tty)) + tty_hangup(dev->tty); + } + dev->modem_status = ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | From 3a5e903c09aed19ca4a1bb26d87b8d6461a93818 Mon Sep 17 00:00:00 2001 From: "J. Suter" Date: Tue, 9 Aug 2005 20:28:46 -0700 Subject: [PATCH 362/584] [Bluetooth]: Implement RFCOMM remote port negotiation This patch implements the remote port negotiation (RPN) of the RFCOMM protocol for Bluetooth. Signed-off-by: J. Suter Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/rfcomm.h | 12 ++- net/bluetooth/rfcomm/core.c | 71 +++++++------ net/bluetooth/rfcomm/tty.c | 189 +++++++++++++++++++++++++++------ 3 files changed, 201 insertions(+), 71 deletions(-) diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 13669bad00b3..3768823d5fd3 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -80,9 +80,9 @@ #define RFCOMM_RPN_STOP_15 1 #define RFCOMM_RPN_PARITY_NONE 0x0 -#define RFCOMM_RPN_PARITY_ODD 0x4 -#define RFCOMM_RPN_PARITY_EVEN 0x5 -#define RFCOMM_RPN_PARITY_MARK 0x6 +#define RFCOMM_RPN_PARITY_ODD 0x1 +#define RFCOMM_RPN_PARITY_EVEN 0x3 +#define RFCOMM_RPN_PARITY_MARK 0x5 #define RFCOMM_RPN_PARITY_SPACE 0x7 #define RFCOMM_RPN_FLOW_NONE 0x00 @@ -223,6 +223,12 @@ struct rfcomm_dlc { #define RFCOMM_CFC_DISABLED 0 #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS +/* ---- RFCOMM SEND RPN ---- */ +int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, + u8 bit_rate, u8 data_bits, u8 stop_bits, + u8 parity, u8 flow_ctrl_settings, + u8 xon_char, u8 xoff_char, u16 param_mask); + /* ---- RFCOMM DLCs (channels) ---- */ struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 27bf5047cd33..52022cc8a2da 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -21,10 +21,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - RPN support - Dirk Husemann -*/ - /* * Bluetooth RFCOMM core. * @@ -115,10 +111,10 @@ static void rfcomm_session_del(struct rfcomm_session *s); #define __get_mcc_len(b) ((b & 0xfe) >> 1) /* RPN macros */ -#define __rpn_line_settings(data, stop, parity) ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x3) << 3)) +#define __rpn_line_settings(data, stop, parity) ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x7) << 3)) #define __get_rpn_data_bits(line) ((line) & 0x3) #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) -#define __get_rpn_parity(line) (((line) >> 3) & 0x3) +#define __get_rpn_parity(line) (((line) >> 3) & 0x7) static inline void rfcomm_schedule(uint event) { @@ -780,10 +776,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d return rfcomm_send_frame(s, buf, ptr - buf); } -static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, - u8 bit_rate, u8 data_bits, u8 stop_bits, - u8 parity, u8 flow_ctrl_settings, - u8 xon_char, u8 xoff_char, u16 param_mask) +int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, + u8 bit_rate, u8 data_bits, u8 stop_bits, + u8 parity, u8 flow_ctrl_settings, + u8 xon_char, u8 xoff_char, u16 param_mask) { struct rfcomm_hdr *hdr; struct rfcomm_mcc *mcc; @@ -791,9 +787,9 @@ static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 buf[16], *ptr = buf; BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x" - "flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", - s, cr, dlci, bit_rate, data_bits, stop_bits, parity, - flow_ctrl_settings, xon_char, xoff_char, param_mask); + " flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", + s, cr, dlci, bit_rate, data_bits, stop_bits, parity, + flow_ctrl_settings, xon_char, xoff_char, param_mask); hdr = (void *) ptr; ptr += sizeof(*hdr); hdr->addr = __addr(s->initiator, 0); @@ -1265,16 +1261,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ u8 xon_char = 0; u8 xoff_char = 0; u16 rpn_mask = RFCOMM_RPN_PM_ALL; - - BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", - dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, - rpn->xon_char, rpn->xoff_char, rpn->param_mask); - - if (!cr) + + BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", + dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, + rpn->xon_char, rpn->xoff_char, rpn->param_mask); + + if (!cr) return 0; - + if (len == 1) { - /* request: return default setting */ + /* This is a request, return default settings */ bit_rate = RFCOMM_RPN_BR_115200; data_bits = RFCOMM_RPN_DATA_8; stop_bits = RFCOMM_RPN_STOP_1; @@ -1282,11 +1278,12 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ flow_ctrl = RFCOMM_RPN_FLOW_NONE; xon_char = RFCOMM_RPN_XON_CHAR; xoff_char = RFCOMM_RPN_XOFF_CHAR; - goto rpn_out; } - /* check for sane values: ignore/accept bit_rate, 8 bits, 1 stop bit, no parity, - no flow control lines, normal XON/XOFF chars */ + + /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit, + * no parity, no flow control lines, normal XON/XOFF chars */ + if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) { bit_rate = rpn->bit_rate; if (bit_rate != RFCOMM_RPN_BR_115200) { @@ -1295,6 +1292,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_BITRATE; } } + if (rpn->param_mask & RFCOMM_RPN_PM_DATA) { data_bits = __get_rpn_data_bits(rpn->line_settings); if (data_bits != RFCOMM_RPN_DATA_8) { @@ -1303,6 +1301,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_DATA; } } + if (rpn->param_mask & RFCOMM_RPN_PM_STOP) { stop_bits = __get_rpn_stop_bits(rpn->line_settings); if (stop_bits != RFCOMM_RPN_STOP_1) { @@ -1311,6 +1310,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_STOP; } } + if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) { parity = __get_rpn_parity(rpn->line_settings); if (parity != RFCOMM_RPN_PARITY_NONE) { @@ -1319,6 +1319,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_PARITY; } } + if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) { flow_ctrl = rpn->flow_ctrl; if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) { @@ -1327,6 +1328,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_FLOW; } } + if (rpn->param_mask & RFCOMM_RPN_PM_XON) { xon_char = rpn->xon_char; if (xon_char != RFCOMM_RPN_XON_CHAR) { @@ -1335,6 +1337,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ rpn_mask ^= RFCOMM_RPN_PM_XON; } } + if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) { xoff_char = rpn->xoff_char; if (xoff_char != RFCOMM_RPN_XOFF_CHAR) { @@ -1345,9 +1348,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } rpn_out: - rfcomm_send_rpn(s, 0, dlci, - bit_rate, data_bits, stop_bits, parity, flow_ctrl, - xon_char, xoff_char, rpn_mask); + rfcomm_send_rpn(s, 0, dlci, bit_rate, data_bits, stop_bits, + parity, flow_ctrl, xon_char, xoff_char, rpn_mask); return 0; } @@ -1358,14 +1360,13 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb u8 dlci = __get_dlci(rls->dlci); BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status); - + if (!cr) return 0; - /* FIXME: We should probably do something with this - information here. But for now it's sufficient just - to reply -- Bluetooth 1.1 says it's mandatory to - recognise and respond to RLS */ + /* We should probably do something with this information here. But + * for now it's sufficient just to reply -- Bluetooth 1.1 says it's + * mandatory to recognise and respond to RLS */ rfcomm_send_rls(s, 0, dlci, rls->status); @@ -1381,7 +1382,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig); d = rfcomm_dlc_get(s, dlci); - if (!d) + if (!d) return 0; if (cr) { @@ -1389,7 +1390,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb set_bit(RFCOMM_TX_THROTTLED, &d->flags); else clear_bit(RFCOMM_TX_THROTTLED, &d->flags); - + rfcomm_dlc_lock(d); if (d->modem_status) d->modem_status(d, msc->v24_sig); @@ -1398,7 +1399,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb rfcomm_send_msc(s, 0, dlci, msc->v24_sig); d->mscex |= RFCOMM_MSCEX_RX; - } else + } else d->mscex |= RFCOMM_MSCEX_TX; return 0; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 67d9dd6b0fac..bbc3a44a86f0 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -745,20 +745,143 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned return -ENOIOCTLCMD; } -#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) - static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) { - BT_DBG("tty %p", tty); + struct termios *new = (struct termios *) tty->termios; + int old_baud_rate = tty_termios_baud_rate(old); + int new_baud_rate = tty_termios_baud_rate(new); - if ((tty->termios->c_cflag == old->c_cflag) && - (RELEVANT_IFLAG(tty->termios->c_iflag) == RELEVANT_IFLAG(old->c_iflag))) - return; + u8 baud, data_bits, stop_bits, parity, x_on, x_off; + u16 changes = 0; - /* handle turning off CRTSCTS */ - if ((old->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { - BT_DBG("turning off CRTSCTS"); + struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + + BT_DBG("tty %p termios %p", tty, old); + + /* Handle turning off CRTSCTS */ + if ((old->c_cflag & CRTSCTS) && !(new->c_cflag & CRTSCTS)) + BT_DBG("Turning off CRTSCTS unsupported"); + + /* Parity on/off and when on, odd/even */ + if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) || + ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) { + changes |= RFCOMM_RPN_PM_PARITY; + BT_DBG("Parity change detected."); } + + /* Mark and space parity are not supported! */ + if (new->c_cflag & PARENB) { + if (new->c_cflag & PARODD) { + BT_DBG("Parity is ODD"); + parity = RFCOMM_RPN_PARITY_ODD; + } else { + BT_DBG("Parity is EVEN"); + parity = RFCOMM_RPN_PARITY_EVEN; + } + } else { + BT_DBG("Parity is OFF"); + parity = RFCOMM_RPN_PARITY_NONE; + } + + /* Setting the x_on / x_off characters */ + if (old->c_cc[VSTOP] != new->c_cc[VSTOP]) { + BT_DBG("XOFF custom"); + x_on = new->c_cc[VSTOP]; + changes |= RFCOMM_RPN_PM_XON; + } else { + BT_DBG("XOFF default"); + x_on = RFCOMM_RPN_XON_CHAR; + } + + if (old->c_cc[VSTART] != new->c_cc[VSTART]) { + BT_DBG("XON custom"); + x_off = new->c_cc[VSTART]; + changes |= RFCOMM_RPN_PM_XOFF; + } else { + BT_DBG("XON default"); + x_off = RFCOMM_RPN_XOFF_CHAR; + } + + /* Handle setting of stop bits */ + if ((old->c_cflag & CSTOPB) != (new->c_cflag & CSTOPB)) + changes |= RFCOMM_RPN_PM_STOP; + + /* POSIX does not support 1.5 stop bits and RFCOMM does not + * support 2 stop bits. So a request for 2 stop bits gets + * translated to 1.5 stop bits */ + if (new->c_cflag & CSTOPB) { + stop_bits = RFCOMM_RPN_STOP_15; + } else { + stop_bits = RFCOMM_RPN_STOP_1; + } + + /* Handle number of data bits [5-8] */ + if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) + changes |= RFCOMM_RPN_PM_DATA; + + switch (new->c_cflag & CSIZE) { + case CS5: + data_bits = RFCOMM_RPN_DATA_5; + break; + case CS6: + data_bits = RFCOMM_RPN_DATA_6; + break; + case CS7: + data_bits = RFCOMM_RPN_DATA_7; + break; + case CS8: + data_bits = RFCOMM_RPN_DATA_8; + break; + default: + data_bits = RFCOMM_RPN_DATA_8; + break; + } + + /* Handle baudrate settings */ + if (old_baud_rate != new_baud_rate) + changes |= RFCOMM_RPN_PM_BITRATE; + + switch (new_baud_rate) { + case 2400: + baud = RFCOMM_RPN_BR_2400; + break; + case 4800: + baud = RFCOMM_RPN_BR_4800; + break; + case 7200: + baud = RFCOMM_RPN_BR_7200; + break; + case 9600: + baud = RFCOMM_RPN_BR_9600; + break; + case 19200: + baud = RFCOMM_RPN_BR_19200; + break; + case 38400: + baud = RFCOMM_RPN_BR_38400; + break; + case 57600: + baud = RFCOMM_RPN_BR_57600; + break; + case 115200: + baud = RFCOMM_RPN_BR_115200; + break; + case 230400: + baud = RFCOMM_RPN_BR_230400; + break; + default: + /* 9600 is standard accordinag to the RFCOMM specification */ + baud = RFCOMM_RPN_BR_9600; + break; + + } + + if (changes) + rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud, + data_bits, stop_bits, parity, + RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes); + + return; } static void rfcomm_tty_throttle(struct tty_struct *tty) @@ -766,7 +889,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty) struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); - + rfcomm_dlc_throttle(dev->dlc); } @@ -775,7 +898,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty) struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); - + rfcomm_dlc_unthrottle(dev->dlc); } @@ -846,35 +969,35 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; - struct rfcomm_dlc *dlc = dev->dlc; - u8 v24_sig; + struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dlc *dlc = dev->dlc; + u8 v24_sig; BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear); - rfcomm_dlc_get_modem_status(dlc, &v24_sig); + rfcomm_dlc_get_modem_status(dlc, &v24_sig); - if (set & TIOCM_DSR || set & TIOCM_DTR) - v24_sig |= RFCOMM_V24_RTC; - if (set & TIOCM_RTS || set & TIOCM_CTS) - v24_sig |= RFCOMM_V24_RTR; - if (set & TIOCM_RI) - v24_sig |= RFCOMM_V24_IC; - if (set & TIOCM_CD) - v24_sig |= RFCOMM_V24_DV; + if (set & TIOCM_DSR || set & TIOCM_DTR) + v24_sig |= RFCOMM_V24_RTC; + if (set & TIOCM_RTS || set & TIOCM_CTS) + v24_sig |= RFCOMM_V24_RTR; + if (set & TIOCM_RI) + v24_sig |= RFCOMM_V24_IC; + if (set & TIOCM_CD) + v24_sig |= RFCOMM_V24_DV; - if (clear & TIOCM_DSR || clear & TIOCM_DTR) - v24_sig &= ~RFCOMM_V24_RTC; - if (clear & TIOCM_RTS || clear & TIOCM_CTS) - v24_sig &= ~RFCOMM_V24_RTR; - if (clear & TIOCM_RI) - v24_sig &= ~RFCOMM_V24_IC; - if (clear & TIOCM_CD) - v24_sig &= ~RFCOMM_V24_DV; + if (clear & TIOCM_DSR || clear & TIOCM_DTR) + v24_sig &= ~RFCOMM_V24_RTC; + if (clear & TIOCM_RTS || clear & TIOCM_CTS) + v24_sig &= ~RFCOMM_V24_RTR; + if (clear & TIOCM_RI) + v24_sig &= ~RFCOMM_V24_IC; + if (clear & TIOCM_CD) + v24_sig &= ~RFCOMM_V24_DV; - rfcomm_dlc_set_modem_status(dlc, v24_sig); + rfcomm_dlc_set_modem_status(dlc, v24_sig); - return 0; + return 0; } /* ---- TTY structure ---- */ From 2eb25a6c34504254760e67172f7518d6bfdd7676 Mon Sep 17 00:00:00 2001 From: Victor Fusco Date: Tue, 9 Aug 2005 20:29:11 -0700 Subject: [PATCH 363/584] [Bluetooth]: Fix sparse warnings (__nocast type) This patch fixes the sparse warnings "implicit cast to nocast type" for the priority or gfp_mask parameters of the memory allocations. Signed-off-by: Victor Fusco Signed-off-by: Domen Puncer Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- drivers/bluetooth/bpa10x.c | 3 ++- drivers/bluetooth/hci_usb.c | 2 +- include/net/bluetooth/bluetooth.h | 2 +- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/rfcomm/tty.c | 2 +- net/bluetooth/sco.c | 2 +- 9 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index f696da6f417b..d6b81c1535dd 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -307,7 +307,8 @@ unlock: read_unlock(&data->lock); } -static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, size_t size, int flags, void *data) +static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, + size_t size, unsigned int __nocast flags, void *data) { struct urb *urb; struct usb_ctrlrequest *cr; diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 657719b8254f..65740346de36 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c @@ -127,7 +127,7 @@ static struct usb_device_id blacklist_ids[] = { { } /* Terminating entry */ }; -static struct _urb *_urb_alloc(int isoc, int gfp) +static struct _urb *_urb_alloc(int isoc, unsigned int __nocast gfp) { struct _urb *_urb = kmalloc(sizeof(struct _urb) + sizeof(struct usb_iso_packet_descriptor) * isoc, gfp); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 06b24f637026..5309b6fa6690 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -135,7 +135,7 @@ struct bt_skb_cb { }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) -static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) +static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) { struct sk_buff *skb; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 3768823d5fd3..ffea9d54071f 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -230,7 +230,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ -struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); +struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 32fccfb5bfa5..d3d6bc547212 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -372,7 +372,7 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, int prio) +static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) { struct sock *sk; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 52022cc8a2da..173f46e8cdae 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -229,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) d->rx_credits = RFCOMM_DEFAULT_CREDITS; } -struct rfcomm_dlc *rfcomm_dlc_alloc(int prio) +struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio) { struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); if (!d) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 63a123c5c41b..90e19eb6d3cc 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -284,7 +284,7 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, int prio) +static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) { struct rfcomm_dlc *d; struct sock *sk; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index bbc3a44a86f0..1bca860a6109 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de skb->destructor = rfcomm_wfree; } -static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, int priority) +static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority) { if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) { struct sk_buff *skb = alloc_skb(size, priority); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 746c11fc017e..ce7ab7dfa0b2 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -418,7 +418,7 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, int prio) +static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) { struct sock *sk; From 0d48d93947dd9ea21c5cdc76a8581b06a4a39281 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:30:28 -0700 Subject: [PATCH 364/584] [Bluetooth]: Move packet type into the SKB control buffer This patch moves the usage of packet type into the SKB control buffer. After this patch it is now possible to shrink the sk_buff structure and redefine its pkt_type. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- drivers/bluetooth/bfusb.c | 8 ++++---- drivers/bluetooth/bluecard_cs.c | 24 ++++++++++++------------ drivers/bluetooth/bpa10x.c | 14 +++++++------- drivers/bluetooth/bt3c_cs.c | 12 ++++++------ drivers/bluetooth/btuart_cs.c | 10 +++++----- drivers/bluetooth/dtl1_cs.c | 10 +++++----- drivers/bluetooth/hci_bcsp.c | 18 +++++++++--------- drivers/bluetooth/hci_h4.c | 4 ++-- drivers/bluetooth/hci_ldisc.c | 4 ++-- drivers/bluetooth/hci_usb.c | 21 +++++++++++---------- drivers/bluetooth/hci_vhci.c | 24 ++++++++++++------------ include/net/bluetooth/bluetooth.h | 3 ++- net/bluetooth/hci_core.c | 16 ++++++++-------- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 18 ++++++++++-------- 15 files changed, 96 insertions(+), 92 deletions(-) diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index e8d2a340356d..1e9db0156ea7 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -330,7 +330,7 @@ static inline int bfusb_recv_block(struct bfusb *bfusb, int hdr, unsigned char * } skb->dev = (void *) bfusb->hdev; - skb->pkt_type = pkt_type; + bt_cb(skb)->pkt_type = pkt_type; bfusb->reassembly = skb; } else { @@ -485,7 +485,7 @@ static int bfusb_send_frame(struct sk_buff *skb) unsigned char buf[3]; int sent = 0, size, count; - BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len); + BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len); if (!hdev) { BT_ERR("Frame for unknown HCI device (hdev=NULL)"); @@ -497,7 +497,7 @@ static int bfusb_send_frame(struct sk_buff *skb) bfusb = (struct bfusb *) hdev->driver_data; - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; @@ -510,7 +510,7 @@ static int bfusb_send_frame(struct sk_buff *skb) }; /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); count = skb->len; diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index bd2ec7e284cc..26fe9c0e1d20 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -270,7 +270,7 @@ static void bluecard_write_wakeup(bluecard_info_t *info) if (!(skb = skb_dequeue(&(info->txq)))) break; - if (skb->pkt_type & 0x80) { + if (bt_cb(skb)->pkt_type & 0x80) { /* Disable RTS */ info->ctrl_reg |= REG_CONTROL_RTS; outb(info->ctrl_reg, iobase + REG_CONTROL); @@ -288,13 +288,13 @@ static void bluecard_write_wakeup(bluecard_info_t *info) /* Mark the buffer as dirty */ clear_bit(ready_bit, &(info->tx_state)); - if (skb->pkt_type & 0x80) { + if (bt_cb(skb)->pkt_type & 0x80) { DECLARE_WAIT_QUEUE_HEAD(wq); DEFINE_WAIT(wait); unsigned char baud_reg; - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case PKT_BAUD_RATE_460800: baud_reg = REG_CONTROL_BAUD_RATE_460800; break; @@ -410,9 +410,9 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset) if (info->rx_state == RECV_WAIT_PACKET_TYPE) { info->rx_skb->dev = (void *) info->hdev; - info->rx_skb->pkt_type = buf[i]; + bt_cb(info->rx_skb)->pkt_type = buf[i]; - switch (info->rx_skb->pkt_type) { + switch (bt_cb(info->rx_skb)->pkt_type) { case 0x00: /* init packet */ @@ -444,7 +444,7 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset) default: /* unknown packet */ - BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); + BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); info->hdev->stat.err_rx++; kfree_skb(info->rx_skb); @@ -586,21 +586,21 @@ static int bluecard_hci_set_baud_rate(struct hci_dev *hdev, int baud) switch (baud) { case 460800: cmd[4] = 0x00; - skb->pkt_type = PKT_BAUD_RATE_460800; + bt_cb(skb)->pkt_type = PKT_BAUD_RATE_460800; break; case 230400: cmd[4] = 0x01; - skb->pkt_type = PKT_BAUD_RATE_230400; + bt_cb(skb)->pkt_type = PKT_BAUD_RATE_230400; break; case 115200: cmd[4] = 0x02; - skb->pkt_type = PKT_BAUD_RATE_115200; + bt_cb(skb)->pkt_type = PKT_BAUD_RATE_115200; break; case 57600: /* Fall through... */ default: cmd[4] = 0x03; - skb->pkt_type = PKT_BAUD_RATE_57600; + bt_cb(skb)->pkt_type = PKT_BAUD_RATE_57600; break; } @@ -680,7 +680,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb) info = (bluecard_info_t *)(hdev->driver_data); - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; @@ -693,7 +693,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb) }; /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); skb_queue_tail(&(info->txq), skb); bluecard_write_wakeup(info); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index d6b81c1535dd..a1bf8f066c88 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -105,7 +105,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c if (skb) { memcpy(skb_put(skb, len), buf, len); skb->dev = (void *) data->hdev; - skb->pkt_type = HCI_ACLDATA_PKT; + bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; hci_recv_frame(skb); } break; @@ -117,7 +117,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c if (skb) { memcpy(skb_put(skb, len), buf, len); skb->dev = (void *) data->hdev; - skb->pkt_type = HCI_SCODATA_PKT; + bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; hci_recv_frame(skb); } break; @@ -129,7 +129,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c if (skb) { memcpy(skb_put(skb, len), buf, len); skb->dev = (void *) data->hdev; - skb->pkt_type = HCI_VENDOR_PKT; + bt_cb(skb)->pkt_type = HCI_VENDOR_PKT; hci_recv_frame(skb); } break; @@ -190,7 +190,7 @@ static int bpa10x_recv_event(struct bpa10x_data *data, unsigned char *buf, int s } skb->dev = (void *) data->hdev; - skb->pkt_type = pkt_type; + bt_cb(skb)->pkt_type = pkt_type; memcpy(skb_put(skb, size), buf, size); @@ -488,7 +488,7 @@ static int bpa10x_send_frame(struct sk_buff *skb) struct hci_dev *hdev = (struct hci_dev *) skb->dev; struct bpa10x_data *data; - BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len); + BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len); if (!hdev) { BT_ERR("Frame for unknown HCI device"); @@ -501,9 +501,9 @@ static int bpa10x_send_frame(struct sk_buff *skb) data = hdev->driver_data; /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; skb_queue_tail(&data->cmd_queue, skb); diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index adf1750ea58d..2e0338d80f32 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -259,11 +259,11 @@ static void bt3c_receive(bt3c_info_t *info) if (info->rx_state == RECV_WAIT_PACKET_TYPE) { info->rx_skb->dev = (void *) info->hdev; - info->rx_skb->pkt_type = inb(iobase + DATA_L); + bt_cb(info->rx_skb)->pkt_type = inb(iobase + DATA_L); inb(iobase + DATA_H); - //printk("bt3c: PACKET_TYPE=%02x\n", info->rx_skb->pkt_type); + //printk("bt3c: PACKET_TYPE=%02x\n", bt_cb(info->rx_skb)->pkt_type); - switch (info->rx_skb->pkt_type) { + switch (bt_cb(info->rx_skb)->pkt_type) { case HCI_EVENT_PKT: info->rx_state = RECV_WAIT_EVENT_HEADER; @@ -282,7 +282,7 @@ static void bt3c_receive(bt3c_info_t *info) default: /* Unknown packet */ - BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); + BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); info->hdev->stat.err_rx++; clear_bit(HCI_RUNNING, &(info->hdev->flags)); @@ -439,7 +439,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb) info = (bt3c_info_t *) (hdev->driver_data); - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; @@ -452,7 +452,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb) }; /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); skb_queue_tail(&(info->txq), skb); spin_lock_irqsave(&(info->lock), flags); diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index e4c59fdc0e12..89486ea7a021 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -211,9 +211,9 @@ static void btuart_receive(btuart_info_t *info) if (info->rx_state == RECV_WAIT_PACKET_TYPE) { info->rx_skb->dev = (void *) info->hdev; - info->rx_skb->pkt_type = inb(iobase + UART_RX); + bt_cb(info->rx_skb)->pkt_type = inb(iobase + UART_RX); - switch (info->rx_skb->pkt_type) { + switch (bt_cb(info->rx_skb)->pkt_type) { case HCI_EVENT_PKT: info->rx_state = RECV_WAIT_EVENT_HEADER; @@ -232,7 +232,7 @@ static void btuart_receive(btuart_info_t *info) default: /* Unknown packet */ - BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); + BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); info->hdev->stat.err_rx++; clear_bit(HCI_RUNNING, &(info->hdev->flags)); @@ -447,7 +447,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb) info = (btuart_info_t *)(hdev->driver_data); - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; @@ -460,7 +460,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb) }; /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); skb_queue_tail(&(info->txq), skb); btuart_write_wakeup(info); diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index e39868c3da48..84c1f8839422 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -251,7 +251,7 @@ static void dtl1_receive(dtl1_info_t *info) info->rx_count = nsh->len + (nsh->len & 0x0001); break; case RECV_WAIT_DATA: - info->rx_skb->pkt_type = nsh->type; + bt_cb(info->rx_skb)->pkt_type = nsh->type; /* remove PAD byte if it exists */ if (nsh->len & 0x0001) { @@ -262,7 +262,7 @@ static void dtl1_receive(dtl1_info_t *info) /* remove NSH */ skb_pull(info->rx_skb, NSHL); - switch (info->rx_skb->pkt_type) { + switch (bt_cb(info->rx_skb)->pkt_type) { case 0x80: /* control data for the Nokia Card */ dtl1_control(info, info->rx_skb); @@ -272,12 +272,12 @@ static void dtl1_receive(dtl1_info_t *info) case 0x84: /* send frame to the HCI layer */ info->rx_skb->dev = (void *) info->hdev; - info->rx_skb->pkt_type &= 0x0f; + bt_cb(info->rx_skb)->pkt_type &= 0x0f; hci_recv_frame(info->rx_skb); break; default: /* unknown packet */ - BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); + BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); kfree_skb(info->rx_skb); break; } @@ -410,7 +410,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb) info = (dtl1_info_t *)(hdev->driver_data); - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; nsh.type = 0x81; diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 858fddb046de..0ee324e1265d 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -149,7 +149,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) return 0; } - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_ACLDATA_PKT: case HCI_COMMAND_PKT: skb_queue_tail(&bcsp->rel, skb); @@ -227,7 +227,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, if (!nskb) return NULL; - nskb->pkt_type = pkt_type; + bt_cb(nskb)->pkt_type = pkt_type; bcsp_slip_msgdelim(nskb); @@ -286,7 +286,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) since they have priority */ if ((skb = skb_dequeue(&bcsp->unrel)) != NULL) { - struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type); + struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type); if (nskb) { kfree_skb(skb); return nskb; @@ -303,7 +303,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) spin_lock_irqsave(&bcsp->unack.lock, flags); if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) { - struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type); + struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type); if (nskb) { __skb_queue_tail(&bcsp->unack, skb); mod_timer(&bcsp->tbcsp, jiffies + HZ / 4); @@ -401,7 +401,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) if (!nskb) return; memcpy(skb_put(nskb, 4), conf_rsp_pkt, 4); - nskb->pkt_type = BCSP_LE_PKT; + bt_cb(nskb)->pkt_type = BCSP_LE_PKT; skb_queue_head(&bcsp->unrel, nskb); hci_uart_tx_wakeup(hu); @@ -483,14 +483,14 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp_pkt_cull(bcsp); if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && bcsp->rx_skb->data[0] & 0x80) { - bcsp->rx_skb->pkt_type = HCI_ACLDATA_PKT; + bt_cb(bcsp->rx_skb)->pkt_type = HCI_ACLDATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && bcsp->rx_skb->data[0] & 0x80) { - bcsp->rx_skb->pkt_type = HCI_EVENT_PKT; + bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { - bcsp->rx_skb->pkt_type = HCI_SCODATA_PKT; + bt_cb(bcsp->rx_skb)->pkt_type = HCI_SCODATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && !(bcsp->rx_skb->data[0] & 0x80)) { @@ -512,7 +512,7 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu) hdr.evt = 0xff; hdr.plen = bcsp->rx_skb->len; memcpy(skb_push(bcsp->rx_skb, HCI_EVENT_HDR_SIZE), &hdr, HCI_EVENT_HDR_SIZE); - bcsp->rx_skb->pkt_type = HCI_EVENT_PKT; + bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT; hci_recv_frame(bcsp->rx_skb); } else { diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 533323b60e63..cf8a22d58d96 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -112,7 +112,7 @@ static int h4_enqueue(struct hci_uart *hu, struct sk_buff *skb) BT_DBG("hu %p skb %p", hu, skb); /* Prepend skb with frame type */ - memcpy(skb_push(skb, 1), &skb->pkt_type, 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); skb_queue_tail(&h4->txq, skb); return 0; } @@ -239,7 +239,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count) return 0; } h4->rx_skb->dev = (void *) hu->hdev; - h4->rx_skb->pkt_type = type; + bt_cb(h4->rx_skb)->pkt_type = type; } return count; } diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 90be2eae52e0..aed80cc22890 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -153,7 +153,7 @@ restart: break; } - hci_uart_tx_complete(hu, skb->pkt_type); + hci_uart_tx_complete(hu, bt_cb(skb)->pkt_type); kfree_skb(skb); } @@ -229,7 +229,7 @@ static int hci_uart_send_frame(struct sk_buff *skb) hu = (struct hci_uart *) hdev->driver_data; tty = hu->tty; - BT_DBG("%s: type %d len %d", hdev->name, skb->pkt_type, skb->len); + BT_DBG("%s: type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); hu->proto->enqueue(hu, skb); diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 65740346de36..67d96b5cbb96 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c @@ -443,7 +443,7 @@ static int __tx_submit(struct hci_usb *husb, struct _urb *_urb) static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) { - struct _urb *_urb = __get_completed(husb, skb->pkt_type); + struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); struct usb_ctrlrequest *dr; struct urb *urb; @@ -451,7 +451,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) _urb = _urb_alloc(0, GFP_ATOMIC); if (!_urb) return -ENOMEM; - _urb->type = skb->pkt_type; + _urb->type = bt_cb(skb)->pkt_type; dr = kmalloc(sizeof(*dr), GFP_ATOMIC); if (!dr) { @@ -479,7 +479,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) { - struct _urb *_urb = __get_completed(husb, skb->pkt_type); + struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); struct urb *urb; int pipe; @@ -487,7 +487,7 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) _urb = _urb_alloc(0, GFP_ATOMIC); if (!_urb) return -ENOMEM; - _urb->type = skb->pkt_type; + _urb->type = bt_cb(skb)->pkt_type; } urb = &_urb->urb; @@ -505,14 +505,14 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) #ifdef CONFIG_BT_HCIUSB_SCO static inline int hci_usb_send_isoc(struct hci_usb *husb, struct sk_buff *skb) { - struct _urb *_urb = __get_completed(husb, skb->pkt_type); + struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); struct urb *urb; if (!_urb) { _urb = _urb_alloc(HCI_MAX_ISOC_FRAMES, GFP_ATOMIC); if (!_urb) return -ENOMEM; - _urb->type = skb->pkt_type; + _urb->type = bt_cb(skb)->pkt_type; } BT_DBG("%s skb %p len %d", husb->hdev->name, skb, skb->len); @@ -601,11 +601,11 @@ static int hci_usb_send_frame(struct sk_buff *skb) if (!test_bit(HCI_RUNNING, &hdev->flags)) return -EBUSY; - BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len); + BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); husb = (struct hci_usb *) hdev->driver_data; - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; @@ -627,7 +627,7 @@ static int hci_usb_send_frame(struct sk_buff *skb) read_lock(&husb->completion_lock); - skb_queue_tail(__transmit_q(husb, skb->pkt_type), skb); + skb_queue_tail(__transmit_q(husb, bt_cb(skb)->pkt_type), skb); hci_usb_tx_wakeup(husb); read_unlock(&husb->completion_lock); @@ -682,7 +682,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c return -ENOMEM; } skb->dev = (void *) husb->hdev; - skb->pkt_type = type; + bt_cb(skb)->pkt_type = type; __reassembly(husb, type) = skb; @@ -702,6 +702,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c if (!scb->expect) { /* Complete frame */ __reassembly(husb, type) = NULL; + bt_cb(skb)->pkt_type = type; hci_recv_frame(skb); } diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 4aa5dfff12be..52cbd45c308f 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -107,7 +107,7 @@ static int vhci_send_frame(struct sk_buff *skb) vhci = hdev->driver_data; - memcpy(skb_push(skb, 1), &skb->pkt_type, 1); + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); skb_queue_tail(&vhci->readq, skb); if (vhci->flags & VHCI_FASYNC) @@ -141,7 +141,7 @@ static inline ssize_t vhci_get_user(struct vhci_data *vhci, } skb->dev = (void *) vhci->hdev; - skb->pkt_type = *((__u8 *) skb->data); + bt_cb(skb)->pkt_type = *((__u8 *) skb->data); skb_pull(skb, 1); hci_recv_frame(skb); @@ -164,18 +164,18 @@ static inline ssize_t vhci_put_user(struct vhci_data *vhci, vhci->hdev->stat.byte_tx += len; - switch (skb->pkt_type) { - case HCI_COMMAND_PKT: - vhci->hdev->stat.cmd_tx++; - break; + switch (bt_cb(skb)->pkt_type) { + case HCI_COMMAND_PKT: + vhci->hdev->stat.cmd_tx++; + break; - case HCI_ACLDATA_PKT: - vhci->hdev->stat.acl_tx++; - break; + case HCI_ACLDATA_PKT: + vhci->hdev->stat.acl_tx++; + break; - case HCI_SCODATA_PKT: - vhci->hdev->stat.cmd_tx++; - break; + case HCI_SCODATA_PKT: + vhci->hdev->stat.cmd_tx++; + break; }; return total; diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 5309b6fa6690..6dfa4a61ffd0 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -131,7 +131,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct bt_skb_cb { - int incoming; + __u8 pkt_type; + __u8 incoming; }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ffa26c10bfe8..4f9e11b533a3 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -191,7 +191,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Special commands */ while ((skb = skb_dequeue(&hdev->driver_init))) { - skb->pkt_type = HCI_COMMAND_PKT; + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; skb_queue_tail(&hdev->cmd_q, skb); hci_sched_cmd(hdev); @@ -995,7 +995,7 @@ static int hci_send_frame(struct sk_buff *skb) return -ENODEV; } - BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len); + BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); if (atomic_read(&hdev->promisc)) { /* Time stamp */ @@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p BT_DBG("skb len %d", skb->len); - skb->pkt_type = HCI_COMMAND_PKT; + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; skb_queue_tail(&hdev->cmd_q, skb); hci_sched_cmd(hdev); @@ -1081,7 +1081,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags); skb->dev = (void *) hdev; - skb->pkt_type = HCI_ACLDATA_PKT; + bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); if (!(list = skb_shinfo(skb)->frag_list)) { @@ -1103,7 +1103,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) skb = list; list = list->next; skb->dev = (void *) hdev; - skb->pkt_type = HCI_ACLDATA_PKT; + bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); @@ -1139,7 +1139,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE); skb->dev = (void *) hdev; - skb->pkt_type = HCI_SCODATA_PKT; + bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; skb_queue_tail(&conn->data_q, skb); hci_sched_tx(hdev); return 0; @@ -1369,7 +1369,7 @@ void hci_rx_task(unsigned long arg) if (test_bit(HCI_INIT, &hdev->flags)) { /* Don't process data packets in this states. */ - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: kfree_skb(skb); @@ -1378,7 +1378,7 @@ void hci_rx_task(unsigned long arg) } /* Process frame */ - switch (skb->pkt_type) { + switch (bt_cb(skb)->pkt_type) { case HCI_EVENT_PKT: hci_event_packet(hdev, skb); break; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a004284c4d98..40b219560bb1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1089,7 +1089,7 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) bt_cb(skb)->incoming = 1; do_gettimeofday(&skb->stamp); - skb->pkt_type = HCI_EVENT_PKT; + bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; hci_send_to_sock(hdev, skb); kfree_skb(skb); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ebdcce5e7ca0..eed9090d77f1 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -110,11 +110,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) /* Apply filter */ flt = &hci_pi(sk)->filter; - if (!test_bit((skb->pkt_type == HCI_VENDOR_PKT) ? - 0 : (skb->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask)) + if (!test_bit((bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) ? + 0 : (bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask)) continue; - if (skb->pkt_type == HCI_EVENT_PKT) { + if (bt_cb(skb)->pkt_type == HCI_EVENT_PKT) { register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); if (!hci_test_bit(evt, &flt->event_mask)) @@ -131,7 +131,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; /* Put type byte before the data */ - memcpy(skb_push(nskb, 1), &nskb->pkt_type, 1); + memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); @@ -327,8 +327,10 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ { __u32 mask = hci_pi(sk)->cmsg_mask; - if (mask & HCI_CMSG_DIR) - put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(int), &bt_cb(skb)->incoming); + if (mask & HCI_CMSG_DIR) { + int incoming = bt_cb(skb)->incoming; + put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); + } if (mask & HCI_CMSG_TSTAMP) put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp); @@ -405,11 +407,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - skb->pkt_type = *((unsigned char *) skb->data); + bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); skb_pull(skb, 1); skb->dev = (void *) hdev; - if (skb->pkt_type == HCI_COMMAND_PKT) { + if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data)); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); From 27258ee54f8cd4a43d09319aa5448145afc2cb8d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:30:56 -0700 Subject: [PATCH 365/584] [DCCP]: Introduce dccp_write_xmit from code in dccp_sendmsg This way it gets closer to the TCP flow, where congestion window checks are done, it seems we can map ccid_hc_tx_send_packet in dccp_write_xmit to tcp_snd_wnd_test in tcp_write_xmit, a CCID2 decision should just fit in here as well... Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 8 ++---- net/dccp/ccids/ccid3.c | 13 +++++---- net/dccp/dccp.h | 5 ++-- net/dccp/output.c | 38 +++++++++++++++++++++++- net/dccp/proto.c | 65 +++++------------------------------------- 5 files changed, 57 insertions(+), 72 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 06105b2a613c..469f9a14b463 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -43,8 +43,7 @@ struct ccid { unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, - struct sk_buff *skb, int len, - long *delay); + struct sk_buff *skb, int len); void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); }; @@ -60,12 +59,11 @@ static inline void __ccid_get(struct ccid *ccid) } static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb, int len, - long *delay) + struct sk_buff *skb, int len) { int rc = 0; if (ccid->ccid_hc_tx_send_packet != NULL) - rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len); return rc; } diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 04299c7565f3..df4adfeaafac 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -977,13 +977,14 @@ out: sock_put(sk); } -static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, - int len, long *delay) +static int ccid3_hc_tx_send_packet(struct sock *sk, + struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_tx_hist_entry *new_packet = NULL; struct timeval now; + long delay; int rc = -ENOTCONN; // ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); @@ -1037,11 +1038,11 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); - ccid3_pr_debug("send_packet delay=%ld\n",*delay); - *delay /= -1000; + delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n", delay); + delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ - rc = *delay > 0 ? -EAGAIN : 0; + rc = delay > 0 ? -EAGAIN : 0; break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 55b690ab61ae..8a0d7af649e4 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -122,6 +122,9 @@ extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, u64 seq); +extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, + const int len); + extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) { @@ -194,8 +197,6 @@ static inline void dccp_openreq_init(struct request_sock *req, req->rcv_wnd = 0; } -extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, - struct sk_buff *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock *dccp_create_openreq_child(struct sock *sk, diff --git a/net/dccp/output.c b/net/dccp/output.c index 4945eaa9d1a4..50292c0605fb 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -148,6 +148,41 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) +{ + const struct dccp_sock *dp = dccp_sk(sk); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, len); + + if (err == 0) { + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + /* + * FIXME: we really should have a + * dccps_ack_pending or use icsk. + */ + } else if (inet_csk_ack_scheduled(sk) || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } + + return err; +} + int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { if (inet_sk_rebuild_header(sk) != 0) @@ -299,7 +334,8 @@ int dccp_connect(struct sock *sk) DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + icsk->icsk_rto, DCCP_RTO_MAX); return 0; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 66c43fce17a6..877c1e0e3c48 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -182,8 +182,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EMSGSIZE; lock_sock(sk); - - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_sndtimeo(sk, noblock); /* * We have to use sk_stream_wait_connect here to set sk_write_pending, @@ -192,77 +191,27 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) - goto out_err; + goto out_release; size = sk->sk_prot->max_header + len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); - if (skb == NULL) goto out_release; skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); - if (rc == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - long delay; + if (rc != 0) + goto out_discard; - /* - * XXX: This is just to match the Waikato tree CA interaction - * points, after the CCID3 code is stable and I have a better - * understanding of behaviour I'll change this to look more like - * TCP. - */ - while (1) { - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, - skb, len, &delay); - if (rc == 0) - break; - if (rc != -EAGAIN) - goto out_discard; - if (delay > timeo) - goto out_discard; - release_sock(sk); - delay = schedule_timeout(delay); - lock_sock(sk); - timeo -= delay; - if (signal_pending(current)) - goto out_interrupted; - rc = -EPIPE; - if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) - goto out_discard; - } - - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - /* FIXME: we really should have a dccps_ack_pending or use icsk */ - } else if (inet_csk_ack_scheduled(sk) || - (dp->dccps_options.dccpo_send_ack_vector && - ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && - ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } else { -out_discard: - kfree_skb(skb); - } + rc = dccp_write_xmit(sk, skb, len); out_release: release_sock(sk); return rc ? : len; -out_err: - rc = sk_stream_error(sk, flags, rc); +out_discard: + kfree_skb(skb); goto out_release; -out_interrupted: - rc = sock_intr_errno(timeo); - goto out_discard; } EXPORT_SYMBOL(dccp_sendmsg); From 0b4e03bf0bc43ad6250a1e2fa25fc3eb2b028977 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:31:11 -0700 Subject: [PATCH 366/584] [DCCP]: Initialize icsk_rto in dccp_v4_init_sock Fixes nasty bug related to the retransmit timer (yeah, DCCP does retransmits) firing too early. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4fa56dbcbea4..6bccf4dd1e70 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1219,6 +1219,7 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_ctl_socket_init = 0; dccp_init_xmit_timers(sk); + inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; From 295ff7edb8f72b77d524759266f7524deae379b3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:44:40 -0700 Subject: [PATCH 367/584] [TIMEWAIT]: Introduce inet_timewait_death_row That groups all of the tables and variables associated to the TCP timewait schedulling/recycling/killing code, that now can be isolated from the TCP specific code and used by other transport protocols, such as DCCP. Next changeset will move this code to net/ipv4/inet_timewait_sock.c Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 57 ++++++- include/net/tcp.h | 36 +---- net/ipv4/proc.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp.c | 4 +- net/ipv4/tcp_ipv4.c | 11 +- net/ipv4/tcp_minisocks.c | 254 ++++++++++++++++--------------- net/ipv6/tcp_ipv6.c | 9 +- 8 files changed, 206 insertions(+), 171 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index e00861b16696..a7e8052e2fbf 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -19,13 +19,69 @@ #include #include +#include #include +#include #include #include #include +struct inet_hashinfo; + +#define INET_TWDR_RECYCLE_SLOTS_LOG 5 +#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) + +/* + * If time > 4sec, it is "slow" path, no recycling is required, + * so that we select tick to get range about 4 seconds. + */ +#if HZ <= 16 || HZ > 4096 +# error Unsupported: HZ <= 16 or HZ > 4096 +#elif HZ <= 32 +# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 64 +# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 128 +# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 256 +# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 512 +# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 1024 +# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 2048 +# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#else +# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#endif + +/* TIME_WAIT reaping mechanism. */ +#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ + +#define INET_TWDR_TWKILL_QUOTA 100 + +struct inet_timewait_death_row { + /* Short-time timewait calendar */ + int twcal_hand; + int twcal_jiffie; + struct timer_list twcal_timer; + struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; + + spinlock_t death_lock; + int tw_count; + int period; + u32 thread_slots; + struct work_struct twkill_work; + struct timer_list tw_timer; + int slot; + struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; + struct inet_hashinfo *hashinfo; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + #if (BITS_PER_LONG == 64) #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #else @@ -33,7 +89,6 @@ #endif struct inet_bind_bucket; -struct inet_hashinfo; /* * This is a TIME_WAIT sock. It works around the memory consumption diff --git a/include/net/tcp.h b/include/net/tcp.h index 077db859ae0d..4c4cd4fb1ed8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -42,9 +43,9 @@ extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; -extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); * timestamps. It must be less than * minimal timewait lifetime. */ - -#define TCP_TW_RECYCLE_SLOTS_LOG 5 -#define TCP_TW_RECYCLE_SLOTS (1< 4sec, it is "slow" path, no recycling is required, - so that we select tick to get range about 4 seconds. - */ - -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) -#else -# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) -#endif /* * TCP option */ @@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ +extern struct inet_timewait_death_row tcp_death_row; + /* sysctl variables for tcp */ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_tw_recycle; extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; @@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_tw_buckets; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 912bbcc7f415..3eadbb271871 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), - tcp_tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e32894532416..ce47a345ecc5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -259,7 +259,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_TCP_MAX_TW_BUCKETS, .procname = "tcp_max_tw_buckets", - .data = &sysctl_tcp_max_tw_buckets, + .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -363,7 +363,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_TCP_TW_RECYCLE, .procname = "tcp_tw_recycle", - .data = &sysctl_tcp_tw_recycle, + .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bda522d25cf..0eed64a1991d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2109,12 +2109,12 @@ void __init tcp_init(void) if (order >= 4) { sysctl_local_port_range[0] = 32768; sysctl_local_port_range[1] = 61000; - sysctl_tcp_max_tw_buckets = 180000; + tcp_death_row.sysctl_max_tw_buckets = 180000; sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_max_syn_backlog = 1024; } else if (order < 3) { sysctl_local_port_range[0] = 1024 * (3 - order); - sysctl_tcp_max_tw_buckets >>= (3 - order); + tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b966102b9f39..83f72346274a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,7 +199,7 @@ unique: NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -291,7 +291,7 @@ ok: spin_unlock(&head->lock); if (tw) { - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row);; inet_twsk_put(tw); } @@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (sysctl_tcp_tw_recycle && + if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { struct inet_peer *peer = rt_get_peer(rt); @@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * are made in the function processing timewait state. */ if (tmp_opt.saw_tstamp && - sysctl_tcp_tw_recycle && + tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { @@ -1305,7 +1305,8 @@ do_time_wait: ntohs(th->dest), inet_iif(skb)); if (sk2) { - tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_deschedule((struct inet_timewait_sock *)sk, + &tcp_death_row); inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2d95afe5b393..81b9a52c50c6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -35,13 +35,37 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_tw_recycle; -int sysctl_tcp_max_tw_buckets = NR_FILE*2; +/* New-style handling of TIME_WAIT sockets. */ + +static void inet_twdr_hangman(unsigned long data); +static void inet_twdr_twkill_work(void *data); +static void inet_twdr_twcal_tick(unsigned long data); int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; -static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); +struct inet_timewait_death_row tcp_death_row = { + .sysctl_max_tw_buckets = NR_FILE * 2, + .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, + .death_lock = SPIN_LOCK_UNLOCKED, + .hashinfo = &tcp_hashinfo, + .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, + (unsigned long)&tcp_death_row), + .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, + inet_twdr_twkill_work, + &tcp_death_row), +/* Short-time timewait calendar */ + + .twcal_hand = -1, + .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, + (unsigned long)&tcp_death_row), +}; + +EXPORT_SYMBOL_GPL(tcp_death_row); + +static void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -52,10 +76,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) return (seq == e_win && seq == end_seq); } -/* New-style handling of TIME_WAIT sockets. */ - -int tcp_tw_count; - /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -132,7 +152,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); return TCP_TW_RST; } @@ -151,11 +171,11 @@ kill_with_rst: * do not undertsnad recycling in any case, it not * a big problem in practice. --ANK */ if (tw->tw_family == AF_INET && - sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && + tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) - tcp_tw_schedule(tw, tw->tw_timeout); + inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); else - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -188,12 +208,12 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); return TCP_TW_SUCCESS; } } - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -243,7 +263,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); /* Send ACK. Note, we do not put the bucket, * it will be released by caller. @@ -263,10 +283,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; - if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tp->af_specific->remember_stamp(sk); - if (tcp_tw_count < sysctl_tcp_max_tw_buckets) + if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); if (tw != NULL) { @@ -306,7 +326,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - tcp_tw_schedule(tw, timeo); + inet_twsk_schedule(tw, &tcp_death_row, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -321,26 +341,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcp_done(sk); } -/* Kill off TIME_WAIT sockets once their lifetime has expired. */ -static int tcp_tw_death_row_slot; - -static void tcp_twkill(unsigned long); - -/* TIME_WAIT reaping mechanism. */ -#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ -#define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) - -#define TCP_TWKILL_QUOTA 100 - -static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS]; -static DEFINE_SPINLOCK(tw_death_lock); -static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0); -static void twkill_work(void *); -static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL); -static u32 twkill_thread_slots; - /* Returns non-zero if quota exceeded. */ -static int tcp_do_twkill_work(int slot, unsigned int quota) +static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, + const int slot) { struct inet_timewait_sock *tw; struct hlist_node *node; @@ -356,19 +359,19 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { + inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); - spin_unlock(&tw_death_lock); - __inet_twsk_kill(tw, &tcp_hashinfo); + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); inet_twsk_put(tw); killed++; - spin_lock(&tw_death_lock); - if (killed > quota) { + spin_lock(&twdr->death_lock); + if (killed > INET_TWDR_TWKILL_QUOTA) { ret = 1; break; } - /* While we dropped tw_death_lock, another cpu may have + /* While we dropped twdr->death_lock, another cpu may have * killed off the next TW bucket in the list, therefore * do a fresh re-read of the hlist head node with the * lock reacquired. We still use the hlist traversal @@ -377,67 +380,68 @@ rescan: goto rescan; } - tcp_tw_count -= killed; + twdr->tw_count -= killed; NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); return ret; } -static void tcp_twkill(unsigned long dummy) +static void inet_twdr_hangman(unsigned long data) { - int need_timer, ret; + struct inet_timewait_death_row *twdr; + int unsigned need_timer; - spin_lock(&tw_death_lock); + twdr = (struct inet_timewait_death_row *)data; + spin_lock(&twdr->death_lock); - if (tcp_tw_count == 0) + if (twdr->tw_count == 0) goto out; need_timer = 0; - ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); - if (ret) { - twkill_thread_slots |= (1 << tcp_tw_death_row_slot); + if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { + twdr->thread_slots |= (1 << twdr->slot); mb(); - schedule_work(&tcp_twkill_work); + schedule_work(&twdr->twkill_work); need_timer = 1; } else { /* We purged the entire slot, anything left? */ - if (tcp_tw_count) + if (twdr->tw_count) need_timer = 1; } - tcp_tw_death_row_slot = - ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); + twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); if (need_timer) - mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD); + mod_timer(&twdr->tw_timer, jiffies + twdr->period); out: - spin_unlock(&tw_death_lock); + spin_unlock(&twdr->death_lock); } extern void twkill_slots_invalid(void); -static void twkill_work(void *dummy) +static void inet_twdr_twkill_work(void *data) { + struct inet_timewait_death_row *twdr = data; int i; - if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) + if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) twkill_slots_invalid(); - while (twkill_thread_slots) { - spin_lock_bh(&tw_death_lock); - for (i = 0; i < TCP_TWKILL_SLOTS; i++) { - if (!(twkill_thread_slots & (1 << i))) + while (twdr->thread_slots) { + spin_lock_bh(&twdr->death_lock); + for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { + if (!(twdr->thread_slots & (1 << i))) continue; - while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { + while (inet_twdr_do_twkill_work(twdr, i) != 0) { if (need_resched()) { - spin_unlock_bh(&tw_death_lock); + spin_unlock_bh(&twdr->death_lock); schedule(); - spin_lock_bh(&tw_death_lock); + spin_lock_bh(&twdr->death_lock); } } - twkill_thread_slots &= ~(1 << i); + twdr->thread_slots &= ~(1 << i); } - spin_unlock_bh(&tw_death_lock); + spin_unlock_bh(&twdr->death_lock); } } @@ -446,28 +450,22 @@ static void twkill_work(void *dummy) */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void tcp_tw_deschedule(struct inet_timewait_sock *tw) +void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr) { - spin_lock(&tw_death_lock); + spin_lock(&twdr->death_lock); if (inet_twsk_del_dead_node(tw)) { inet_twsk_put(tw); - if (--tcp_tw_count == 0) - del_timer(&tcp_tw_timer); + if (--twdr->tw_count == 0) + del_timer(&twdr->tw_timer); } - spin_unlock(&tw_death_lock); - __inet_twsk_kill(tw, &tcp_hashinfo); + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); } -/* Short-time timewait calendar */ - -static int tcp_twcal_hand = -1; -static int tcp_twcal_jiffie; -static void tcp_twcal_tick(unsigned long); -static struct timer_list tcp_twcal_timer = - TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); -static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; - -static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) +static void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo) { struct hlist_head *list; int slot; @@ -496,100 +494,106 @@ static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ - slot = (timeo + (1<> TCP_TW_RECYCLE_TICK; + slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - spin_lock(&tw_death_lock); + spin_lock(&twdr->death_lock); /* Unlink it, if it was scheduled */ if (inet_twsk_del_dead_node(tw)) - tcp_tw_count--; + twdr->tw_count--; else atomic_inc(&tw->tw_refcnt); - if (slot >= TCP_TW_RECYCLE_SLOTS) { + if (slot >= INET_TWDR_RECYCLE_SLOTS) { /* Schedule to slow timer */ if (timeo >= TCP_TIMEWAIT_LEN) { - slot = TCP_TWKILL_SLOTS-1; + slot = INET_TWDR_TWKILL_SLOTS - 1; } else { - slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; - if (slot >= TCP_TWKILL_SLOTS) - slot = TCP_TWKILL_SLOTS-1; + slot = (timeo + twdr->period - 1) / twdr->period; + if (slot >= INET_TWDR_TWKILL_SLOTS) + slot = INET_TWDR_TWKILL_SLOTS - 1; } tw->tw_ttd = jiffies + timeo; - slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); - list = &tcp_tw_death_row[slot]; + slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); + list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); + tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); - if (tcp_twcal_hand < 0) { - tcp_twcal_hand = 0; - tcp_twcal_jiffie = jiffies; - tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<twcal_hand < 0) { + twdr->twcal_hand = 0; + twdr->twcal_jiffie = jiffies; + twdr->twcal_timer.expires = twdr->twcal_jiffie + + (slot << INET_TWDR_RECYCLE_TICK); + add_timer(&twdr->twcal_timer); } else { - if (time_after(tcp_twcal_timer.expires, jiffies + (slot<twcal_timer.expires, + jiffies + (slot << INET_TWDR_RECYCLE_TICK))) + mod_timer(&twdr->twcal_timer, + jiffies + (slot << INET_TWDR_RECYCLE_TICK)); + slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); } - list = &tcp_twcal_row[slot]; + list = &twdr->twcal_row[slot]; } hlist_add_head(&tw->tw_death_node, list); - if (tcp_tw_count++ == 0) - mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); - spin_unlock(&tw_death_lock); + if (twdr->tw_count++ == 0) + mod_timer(&twdr->tw_timer, jiffies + twdr->period); + spin_unlock(&twdr->death_lock); } -void tcp_twcal_tick(unsigned long dummy) +void inet_twdr_twcal_tick(unsigned long data) { + struct inet_timewait_death_row *twdr; int n, slot; unsigned long j; unsigned long now = jiffies; int killed = 0; int adv = 0; - spin_lock(&tw_death_lock); - if (tcp_twcal_hand < 0) + twdr = (struct inet_timewait_death_row *)data; + + spin_lock(&twdr->death_lock); + if (twdr->twcal_hand < 0) goto out; - slot = tcp_twcal_hand; - j = tcp_twcal_jiffie; + slot = twdr->twcal_hand; + j = twdr->twcal_jiffie; - for (n=0; ntwcal_row[slot]) { __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, &tcp_hashinfo); + __inet_twsk_kill(tw, twdr->hashinfo); inet_twsk_put(tw); killed++; } } else { if (!adv) { adv = 1; - tcp_twcal_jiffie = j; - tcp_twcal_hand = slot; + twdr->twcal_jiffie = j; + twdr->twcal_hand = slot; } - if (!hlist_empty(&tcp_twcal_row[slot])) { - mod_timer(&tcp_twcal_timer, j); + if (!hlist_empty(&twdr->twcal_row[slot])) { + mod_timer(&twdr->twcal_timer, j); goto out; } } - j += (1<twcal_hand = -1; out: - if ((tcp_tw_count -= killed) == 0) - del_timer(&tcp_tw_timer); + if ((twdr->tw_count -= killed) == 0) + del_timer(&twdr->tw_timer); NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); - spin_unlock(&tw_death_lock); + spin_unlock(&twdr->death_lock); } /* This is not only more efficient than what we used to do, it eliminates @@ -929,4 +933,4 @@ EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_timewait_state_process); -EXPORT_SYMBOL(tcp_tw_deschedule); +EXPORT_SYMBOL(inet_twsk_deschedule); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0b51ec310ebe..1c21ad66cfad 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -521,7 +521,7 @@ unique: NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -611,7 +611,7 @@ ok: spin_unlock(&head->lock); if (tw) { - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); } @@ -1820,8 +1820,9 @@ do_time_wait: sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { - tcp_tw_deschedule((struct inet_timewait_sock *)sk); - inet_twsk_put((struct inet_timewait_sock *)sk); + struct inet_timewait_sock *tw = inet_twsk(sk); + inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_put(tw); sk = sk2; goto process; } From 696ab2d3bffc746fb8cf3712f066d42b9886aeed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:03 -0700 Subject: [PATCH 368/584] [TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c Also export the ones that will be used in the next changeset, when DCCP uses this infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 10 ++ include/net/tcp.h | 2 - net/ipv4/inet_timewait_sock.c | 270 +++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 281 ++----------------------------- 4 files changed, 290 insertions(+), 273 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index a7e8052e2fbf..3b070352e869 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -82,6 +82,10 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +extern void inet_twdr_hangman(unsigned long data); +extern void inet_twdr_twkill_work(void *data); +extern void inet_twdr_twcal_tick(unsigned long data); + #if (BITS_PER_LONG == 64) #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #else @@ -206,4 +210,10 @@ extern void __inet_twsk_kill(struct inet_timewait_sock *tw, extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); + +extern void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo, const int timewait_len); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c4cd4fb1ed8..d489ac548e4b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,8 +44,6 @@ extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr); #define MAX_TCP_HEADER (128 + MAX_HEADER) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 22882d95f646..4d1502a49852 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -12,6 +12,7 @@ #include #include +#include /* Must be called with locally disabled BHs. */ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) @@ -85,6 +86,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, write_unlock(&ehead->lock); } +EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); + struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) { struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, @@ -112,3 +115,270 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat return tw; } + +EXPORT_SYMBOL_GPL(inet_twsk_alloc); + +/* Returns non-zero if quota exceeded. */ +static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, + const int slot) +{ + struct inet_timewait_sock *tw; + struct hlist_node *node; + unsigned int killed; + int ret; + + /* NOTE: compare this to previous version where lock + * was released after detaching chain. It was racy, + * because tw buckets are scheduled in not serialized context + * in 2.3 (with netfilter), and with softnet it is common, because + * soft irqs are not sequenced. + */ + killed = 0; + ret = 0; +rescan: + inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + __inet_twsk_del_dead_node(tw); + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); + inet_twsk_put(tw); + killed++; + spin_lock(&twdr->death_lock); + if (killed > INET_TWDR_TWKILL_QUOTA) { + ret = 1; + break; + } + + /* While we dropped twdr->death_lock, another cpu may have + * killed off the next TW bucket in the list, therefore + * do a fresh re-read of the hlist head node with the + * lock reacquired. We still use the hlist traversal + * macro in order to get the prefetches. + */ + goto rescan; + } + + twdr->tw_count -= killed; + NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); + + return ret; +} + +void inet_twdr_hangman(unsigned long data) +{ + struct inet_timewait_death_row *twdr; + int unsigned need_timer; + + twdr = (struct inet_timewait_death_row *)data; + spin_lock(&twdr->death_lock); + + if (twdr->tw_count == 0) + goto out; + + need_timer = 0; + if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { + twdr->thread_slots |= (1 << twdr->slot); + mb(); + schedule_work(&twdr->twkill_work); + need_timer = 1; + } else { + /* We purged the entire slot, anything left? */ + if (twdr->tw_count) + need_timer = 1; + } + twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); + if (need_timer) + mod_timer(&twdr->tw_timer, jiffies + twdr->period); +out: + spin_unlock(&twdr->death_lock); +} + +EXPORT_SYMBOL_GPL(inet_twdr_hangman); + +extern void twkill_slots_invalid(void); + +void inet_twdr_twkill_work(void *data) +{ + struct inet_timewait_death_row *twdr = data; + int i; + + if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) + twkill_slots_invalid(); + + while (twdr->thread_slots) { + spin_lock_bh(&twdr->death_lock); + for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { + if (!(twdr->thread_slots & (1 << i))) + continue; + + while (inet_twdr_do_twkill_work(twdr, i) != 0) { + if (need_resched()) { + spin_unlock_bh(&twdr->death_lock); + schedule(); + spin_lock_bh(&twdr->death_lock); + } + } + + twdr->thread_slots &= ~(1 << i); + } + spin_unlock_bh(&twdr->death_lock); + } +} + +EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); + +/* These are always called from BH context. See callers in + * tcp_input.c to verify this. + */ + +/* This is for handling early-kills of TIME_WAIT sockets. */ +void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr) +{ + spin_lock(&twdr->death_lock); + if (inet_twsk_del_dead_node(tw)) { + inet_twsk_put(tw); + if (--twdr->tw_count == 0) + del_timer(&twdr->tw_timer); + } + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); +} + +EXPORT_SYMBOL(inet_twsk_deschedule); + +void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo, const int timewait_len) +{ + struct hlist_head *list; + int slot; + + /* timeout := RTO * 3.5 + * + * 3.5 = 1+2+0.5 to wait for two retransmits. + * + * RATIONALE: if FIN arrived and we entered TIME-WAIT state, + * our ACK acking that FIN can be lost. If N subsequent retransmitted + * FINs (or previous seqments) are lost (probability of such event + * is p^(N+1), where p is probability to lose single packet and + * time to detect the loss is about RTO*(2^N - 1) with exponential + * backoff). Normal timewait length is calculated so, that we + * waited at least for one retransmitted FIN (maximal RTO is 120sec). + * [ BTW Linux. following BSD, violates this requirement waiting + * only for 60sec, we should wait at least for 240 secs. + * Well, 240 consumes too much of resources 8) + * ] + * This interval is not reduced to catch old duplicate and + * responces to our wandering segments living for two MSLs. + * However, if we use PAWS to detect + * old duplicates, we can reduce the interval to bounds required + * by RTO, rather than MSL. So, if peer understands PAWS, we + * kill tw bucket after 3.5*RTO (it is important that this number + * is greater than TS tick!) and detect old duplicates with help + * of PAWS. + */ + slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; + + spin_lock(&twdr->death_lock); + + /* Unlink it, if it was scheduled */ + if (inet_twsk_del_dead_node(tw)) + twdr->tw_count--; + else + atomic_inc(&tw->tw_refcnt); + + if (slot >= INET_TWDR_RECYCLE_SLOTS) { + /* Schedule to slow timer */ + if (timeo >= timewait_len) { + slot = INET_TWDR_TWKILL_SLOTS - 1; + } else { + slot = (timeo + twdr->period - 1) / twdr->period; + if (slot >= INET_TWDR_TWKILL_SLOTS) + slot = INET_TWDR_TWKILL_SLOTS - 1; + } + tw->tw_ttd = jiffies + timeo; + slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); + list = &twdr->cells[slot]; + } else { + tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + + if (twdr->twcal_hand < 0) { + twdr->twcal_hand = 0; + twdr->twcal_jiffie = jiffies; + twdr->twcal_timer.expires = twdr->twcal_jiffie + + (slot << INET_TWDR_RECYCLE_TICK); + add_timer(&twdr->twcal_timer); + } else { + if (time_after(twdr->twcal_timer.expires, + jiffies + (slot << INET_TWDR_RECYCLE_TICK))) + mod_timer(&twdr->twcal_timer, + jiffies + (slot << INET_TWDR_RECYCLE_TICK)); + slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); + } + list = &twdr->twcal_row[slot]; + } + + hlist_add_head(&tw->tw_death_node, list); + + if (twdr->tw_count++ == 0) + mod_timer(&twdr->tw_timer, jiffies + twdr->period); + spin_unlock(&twdr->death_lock); +} + +EXPORT_SYMBOL_GPL(inet_twsk_schedule); + +void inet_twdr_twcal_tick(unsigned long data) +{ + struct inet_timewait_death_row *twdr; + int n, slot; + unsigned long j; + unsigned long now = jiffies; + int killed = 0; + int adv = 0; + + twdr = (struct inet_timewait_death_row *)data; + + spin_lock(&twdr->death_lock); + if (twdr->twcal_hand < 0) + goto out; + + slot = twdr->twcal_hand; + j = twdr->twcal_jiffie; + + for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { + if (time_before_eq(j, now)) { + struct hlist_node *node, *safe; + struct inet_timewait_sock *tw; + + inet_twsk_for_each_inmate_safe(tw, node, safe, + &twdr->twcal_row[slot]) { + __inet_twsk_del_dead_node(tw); + __inet_twsk_kill(tw, twdr->hashinfo); + inet_twsk_put(tw); + killed++; + } + } else { + if (!adv) { + adv = 1; + twdr->twcal_jiffie = j; + twdr->twcal_hand = slot; + } + + if (!hlist_empty(&twdr->twcal_row[slot])) { + mod_timer(&twdr->twcal_timer, j); + goto out; + } + } + j += 1 << INET_TWDR_RECYCLE_TICK; + slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); + } + twdr->twcal_hand = -1; + +out: + if ((twdr->tw_count -= killed) == 0) + del_timer(&twdr->tw_timer); + NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); + spin_unlock(&twdr->death_lock); +} + +EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 81b9a52c50c6..dc085233d512 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -35,12 +35,6 @@ #define SYNC_INIT 1 #endif -/* New-style handling of TIME_WAIT sockets. */ - -static void inet_twdr_hangman(unsigned long data); -static void inet_twdr_twkill_work(void *data); -static void inet_twdr_twcal_tick(unsigned long data); - int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; @@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = { EXPORT_SYMBOL_GPL(tcp_death_row); -static void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo); - static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -173,9 +163,11 @@ kill_with_rst: if (tw->tw_family == AF_INET && tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); + inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, + TCP_TIMEWAIT_LEN); else - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -213,7 +205,8 @@ kill: return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -263,7 +256,8 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); /* Send ACK. Note, we do not put the bucket, * it will be released by caller. @@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - inet_twsk_schedule(tw, &tcp_death_row, timeo); + inet_twsk_schedule(tw, &tcp_death_row, timeo, + TCP_TIMEWAIT_LEN); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcp_done(sk); } -/* Returns non-zero if quota exceeded. */ -static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, - const int slot) -{ - struct inet_timewait_sock *tw; - struct hlist_node *node; - unsigned int killed; - int ret; - - /* NOTE: compare this to previous version where lock - * was released after detaching chain. It was racy, - * because tw buckets are scheduled in not serialized context - * in 2.3 (with netfilter), and with softnet it is common, because - * soft irqs are not sequenced. - */ - killed = 0; - ret = 0; -rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { - __inet_twsk_del_dead_node(tw); - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); - inet_twsk_put(tw); - killed++; - spin_lock(&twdr->death_lock); - if (killed > INET_TWDR_TWKILL_QUOTA) { - ret = 1; - break; - } - - /* While we dropped twdr->death_lock, another cpu may have - * killed off the next TW bucket in the list, therefore - * do a fresh re-read of the hlist head node with the - * lock reacquired. We still use the hlist traversal - * macro in order to get the prefetches. - */ - goto rescan; - } - - twdr->tw_count -= killed; - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); - - return ret; -} - -static void inet_twdr_hangman(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int unsigned need_timer; - - twdr = (struct inet_timewait_death_row *)data; - spin_lock(&twdr->death_lock); - - if (twdr->tw_count == 0) - goto out; - - need_timer = 0; - if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { - twdr->thread_slots |= (1 << twdr->slot); - mb(); - schedule_work(&twdr->twkill_work); - need_timer = 1; - } else { - /* We purged the entire slot, anything left? */ - if (twdr->tw_count) - need_timer = 1; - } - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); - if (need_timer) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); -out: - spin_unlock(&twdr->death_lock); -} - -extern void twkill_slots_invalid(void); - -static void inet_twdr_twkill_work(void *data) -{ - struct inet_timewait_death_row *twdr = data; - int i; - - if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) - twkill_slots_invalid(); - - while (twdr->thread_slots) { - spin_lock_bh(&twdr->death_lock); - for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { - if (!(twdr->thread_slots & (1 << i))) - continue; - - while (inet_twdr_do_twkill_work(twdr, i) != 0) { - if (need_resched()) { - spin_unlock_bh(&twdr->death_lock); - schedule(); - spin_lock_bh(&twdr->death_lock); - } - } - - twdr->thread_slots &= ~(1 << i); - } - spin_unlock_bh(&twdr->death_lock); - } -} - -/* These are always called from BH context. See callers in - * tcp_input.c to verify this. - */ - -/* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr) -{ - spin_lock(&twdr->death_lock); - if (inet_twsk_del_dead_node(tw)) { - inet_twsk_put(tw); - if (--twdr->tw_count == 0) - del_timer(&twdr->tw_timer); - } - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); -} - -static void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo) -{ - struct hlist_head *list; - int slot; - - /* timeout := RTO * 3.5 - * - * 3.5 = 1+2+0.5 to wait for two retransmits. - * - * RATIONALE: if FIN arrived and we entered TIME-WAIT state, - * our ACK acking that FIN can be lost. If N subsequent retransmitted - * FINs (or previous seqments) are lost (probability of such event - * is p^(N+1), where p is probability to lose single packet and - * time to detect the loss is about RTO*(2^N - 1) with exponential - * backoff). Normal timewait length is calculated so, that we - * waited at least for one retransmitted FIN (maximal RTO is 120sec). - * [ BTW Linux. following BSD, violates this requirement waiting - * only for 60sec, we should wait at least for 240 secs. - * Well, 240 consumes too much of resources 8) - * ] - * This interval is not reduced to catch old duplicate and - * responces to our wandering segments living for two MSLs. - * However, if we use PAWS to detect - * old duplicates, we can reduce the interval to bounds required - * by RTO, rather than MSL. So, if peer understands PAWS, we - * kill tw bucket after 3.5*RTO (it is important that this number - * is greater than TS tick!) and detect old duplicates with help - * of PAWS. - */ - slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - - spin_lock(&twdr->death_lock); - - /* Unlink it, if it was scheduled */ - if (inet_twsk_del_dead_node(tw)) - twdr->tw_count--; - else - atomic_inc(&tw->tw_refcnt); - - if (slot >= INET_TWDR_RECYCLE_SLOTS) { - /* Schedule to slow timer */ - if (timeo >= TCP_TIMEWAIT_LEN) { - slot = INET_TWDR_TWKILL_SLOTS - 1; - } else { - slot = (timeo + twdr->period - 1) / twdr->period; - if (slot >= INET_TWDR_TWKILL_SLOTS) - slot = INET_TWDR_TWKILL_SLOTS - 1; - } - tw->tw_ttd = jiffies + timeo; - slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); - list = &twdr->cells[slot]; - } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); - - if (twdr->twcal_hand < 0) { - twdr->twcal_hand = 0; - twdr->twcal_jiffie = jiffies; - twdr->twcal_timer.expires = twdr->twcal_jiffie + - (slot << INET_TWDR_RECYCLE_TICK); - add_timer(&twdr->twcal_timer); - } else { - if (time_after(twdr->twcal_timer.expires, - jiffies + (slot << INET_TWDR_RECYCLE_TICK))) - mod_timer(&twdr->twcal_timer, - jiffies + (slot << INET_TWDR_RECYCLE_TICK)); - slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - list = &twdr->twcal_row[slot]; - } - - hlist_add_head(&tw->tw_death_node, list); - - if (twdr->tw_count++ == 0) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); - spin_unlock(&twdr->death_lock); -} - -void inet_twdr_twcal_tick(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int n, slot; - unsigned long j; - unsigned long now = jiffies; - int killed = 0; - int adv = 0; - - twdr = (struct inet_timewait_death_row *)data; - - spin_lock(&twdr->death_lock); - if (twdr->twcal_hand < 0) - goto out; - - slot = twdr->twcal_hand; - j = twdr->twcal_jiffie; - - for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { - if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; - struct inet_timewait_sock *tw; - - inet_twsk_for_each_inmate_safe(tw, node, safe, - &twdr->twcal_row[slot]) { - __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, twdr->hashinfo); - inet_twsk_put(tw); - killed++; - } - } else { - if (!adv) { - adv = 1; - twdr->twcal_jiffie = j; - twdr->twcal_hand = slot; - } - - if (!hlist_empty(&twdr->twcal_row[slot])) { - mod_timer(&twdr->twcal_timer, j); - goto out; - } - } - j += 1 << INET_TWDR_RECYCLE_TICK; - slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - twdr->twcal_hand = -1; - -out: - if ((twdr->tw_count -= killed) == 0) - del_timer(&twdr->tw_timer); - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); - spin_unlock(&twdr->death_lock); -} - /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_timewait_state_process); -EXPORT_SYMBOL(inet_twsk_deschedule); From 64cf1e5d8b5f88d56509260e08fa0d8314277350 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:21 -0700 Subject: [PATCH 369/584] [DCCP]: Finish the TIMEWAIT minisock support Using most of the infrastructure TCP uses, with a dccp_death_row, etc. As per my current interpretation of the draft what we have with this changeset seems to be all we need (or very close to it 8)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 +++ net/dccp/ipv4.c | 15 +++++++---- net/dccp/minisocks.c | 60 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 431d58923ba9..3dccdd5108b5 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -194,6 +194,7 @@ enum { #include #include +#include #include #include #include @@ -354,6 +355,8 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) return (struct dccp_request_sock *)req; } +extern struct inet_timewait_death_row dccp_death_row; + /* Read about the ECN nonce to see why it is 253 */ #define DCCP_MAX_ACK_VECTOR_LEN 253 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6bccf4dd1e70..f6da9328221e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -97,7 +97,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw != NULL) { /* Silly. Should hash-dance instead... */ - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -201,7 +201,7 @@ ok: spin_unlock(&head->lock); if (tw != NULL) { - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); inet_twsk_put(tw); } @@ -1131,8 +1131,9 @@ int dccp_v4_rcv(struct sk_buff *skb) */ if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); - goto discard_and_relse; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " + "do_time_wait\n"); + goto do_time_wait; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { @@ -1179,6 +1180,10 @@ discard_it: discard_and_relse: sock_put(sk); goto discard_it; + +do_time_wait: + inet_twsk_put((struct inet_timewait_sock *)sk); + goto no_dccp_socket; } static int dccp_v4_init_sock(struct sock *sk) @@ -1290,5 +1295,5 @@ struct proto dccp_v4_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, - .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ + .twsk_obj_size = sizeof(struct inet_timewait_sock), }; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e498e389fccc..a6a0b270fb6c 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -22,18 +22,58 @@ #include "ccid.h" #include "dccp.h" +struct inet_timewait_death_row dccp_death_row = { + .sysctl_max_tw_buckets = NR_FILE * 2, + .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, + .death_lock = SPIN_LOCK_UNLOCKED, + .hashinfo = &dccp_hashinfo, + .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, + (unsigned long)&dccp_death_row), + .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, + inet_twdr_twkill_work, + &dccp_death_row), +/* Short-time timewait calendar */ + + .twcal_hand = -1, + .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, + (unsigned long)&dccp_death_row), +}; + void dccp_time_wait(struct sock *sk, int state, int timeo) { - /* FIXME: Implement */ - dccp_pr_debug("Want to help? Start here\n"); - dccp_set_state(sk, state); -} + struct inet_timewait_sock *tw = NULL; -/* This is for handling early-kills of TIME_WAIT sockets. */ -void dccp_tw_deschedule(struct inet_timewait_sock *tw) -{ - dccp_pr_debug("Want to help? Start here\n"); - __inet_twsk_kill(tw, &dccp_hashinfo); + if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) + tw = inet_twsk_alloc(sk, state); + + if (tw != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + + /* Get the TIME_WAIT timeout firing. */ + if (timeo < rto) + timeo = rto; + + tw->tw_timeout = DCCP_TIMEWAIT_LEN; + if (state == DCCP_TIME_WAIT) + timeo = DCCP_TIMEWAIT_LEN; + + inet_twsk_schedule(tw, &dccp_death_row, timeo, + DCCP_TIMEWAIT_LEN); + inet_twsk_put(tw); + } else { + /* Sorry, if we're out of memory, just CLOSE this + * socket up. We've got bigger problems than + * non-graceful socket closings. + */ + if (net_ratelimit()) + printk(KERN_INFO "DCCP: time wait bucket table overflow\n"); + } + + dccp_done(sk); } struct sock *dccp_create_openreq_child(struct sock *sk, @@ -55,7 +95,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, From 64ce207306debd7157f47282be94770407bec01c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:50:53 -0700 Subject: [PATCH 370/584] [NET]: Make NETDEBUG pure printk wrappers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++---- net/dccp/input.c | 2 +- net/dccp/options.c | 8 ++++---- net/ipv4/esp4.c | 12 ++++++------ net/ipv4/icmp.c | 12 +++++------- net/ipv4/igmp.c | 2 +- net/ipv4/ip_fragment.c | 6 +++--- net/ipv4/ip_output.c | 2 +- net/ipv4/ipcomp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 11 +++++------ net/ipv4/udp.c | 32 ++++++++++++++++---------------- net/ipv6/ah6.c | 13 ++++++------- net/ipv6/datagram.c | 4 ++-- net/ipv6/esp6.c | 3 +-- net/ipv6/exthdrs.c | 8 ++++---- net/ipv6/icmp.c | 20 +++++++------------- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter.c | 3 +-- net/ipv6/raw.c | 3 +-- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 7 +++---- 21 files changed, 75 insertions(+), 89 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8678313a22b4..065df67b6422 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1316,11 +1316,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); */ #if 0 -#define NETDEBUG(x) do { } while (0) -#define LIMIT_NETDEBUG(x) do {} while(0) +#define NETDEBUG(fmt, args...) do { } while (0) +#define LIMIT_NETDEBUG(fmt, args...) do { } while(0) #else -#define NETDEBUG(x) do { x; } while (0) -#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) +#define NETDEBUG(fmt, args...) printk(fmt,##args) +#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) #endif /* diff --git a/net/dccp/input.c b/net/dccp/input.c index 76c3401e93a5..bdaecde0bde1 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -161,7 +161,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); diff --git a/net/dccp/options.c b/net/dccp/options.c index 9ca32cba83af..5bf997683a16 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -231,7 +231,7 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert %d option!\n", option); return; } @@ -299,7 +299,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert elapsed time!\n"); return; } @@ -335,7 +335,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert ACK Vector!\n"); return; } @@ -412,7 +412,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *s unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert timestamp echo!\n"); return; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ba57446d5d1f..b31ffc5053d2 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", - ntohl(esph->spi), ntohl(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", + ntohl(esph->spi), ntohl(iph->daddr)); xfrm_state_put(x); } @@ -395,10 +395,10 @@ static int esp_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_tfm_alg_digestsize(esp->auth.tfm)) { - NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8)); + NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_tfm_alg_digestsize(esp->auth.tfm), + aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index badfc5849973..25f66b750fd8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); } else { info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); @@ -640,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " "Route Failed.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); break; default: break; @@ -936,7 +934,7 @@ int icmp_rcv(struct sk_buff *skb) case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; - LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n"); case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) goto error; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5088f90835ae..44607f4767b8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -904,7 +904,7 @@ int igmp_rcv(struct sk_buff *skb) case IGMP_MTRACE_RESP: break; default: - NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type)); + NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); } in_dev_put(in_dev); kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index eb377ae15305..1ac64c0c5b37 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) return ip_frag_intern(hash, qp); out_nomem: - LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); + LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } @@ -625,8 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) return head; out_nomem: - LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " - "queue %p\n", qp)); + LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp); goto out_fail; out_oversize: if (net_ratelimit()) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 633945d27ac2..19f24f778dc8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -573,7 +573,7 @@ slow_path: */ if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { - NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); + NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); err = -ENOMEM; goto fail; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 7ded6e60f43a..dcb7ee6c4858 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -214,8 +214,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) spi, IPPROTO_COMP, AF_INET); if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", - spi, NIPQUAD(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", + spi, NIPQUAD(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 83f72346274a..32a0ebc589d5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -990,11 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " - "request from %u.%u." - "%u.%u/%u\n", - NIPQUAD(saddr), - ntohs(skb->h.th->source))); + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " + "request from %u.%u.%u.%u/%u\n", + NIPQUAD(saddr), + ntohs(skb->h.th->source)); dst_release(dst); goto drop_and_free; } @@ -1118,7 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) skb->nh.iph->daddr, skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n"); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a8135e1f528c..3a5bbbe7dd85 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -629,7 +629,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); err = -EINVAL; goto out; } @@ -694,7 +694,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); return -EINVAL; } @@ -1103,7 +1103,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, skb->ip_summed = CHECKSUM_UNNECESSARY; if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -1182,13 +1182,13 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - len, - NIPQUAD(daddr), - ntohs(uh->dest))); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + len, + NIPQUAD(daddr), + ntohs(uh->dest)); no_header: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1199,12 +1199,12 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen)); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); drop: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 986fdfdccbcd..0ebfad907a03 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -131,10 +131,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) case NEXTHDR_HOP: case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG(printk( + LIMIT_NETDEBUG( KERN_WARNING "overrun %sopts\n", nexthdr == NEXTHDR_HOP ? - "hop" : "dest")); + "hop" : "dest"); return -EINVAL; } break; @@ -293,8 +293,7 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc skb_push(skb, skb->data - skb->nh.raw); ahp->icv(ahp, skb, ah->auth_data); if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { - LIMIT_NETDEBUG( - printk(KERN_WARNING "ipsec ah authentication error\n")); + LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); x->stats.integrity_failed++; goto free_out; } @@ -332,9 +331,9 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/" - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - ntohl(ah->spi), NIP6(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" + "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + ntohl(ah->spi), NIP6(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 761984f3bd9c..01468fab3d3d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -589,8 +589,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; default: - LIMIT_NETDEBUG( - printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type)); + LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; break; }; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 324db62515a2..e8bff9d3d96c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -212,8 +212,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru padlen = nexthdr[0]; if (padlen+2 >= elen) { - LIMIT_NETDEBUG( - printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen)); + LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen); ret = -EINVAL; goto out; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e0839eafc3a9..5be6da2584ee 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -424,8 +424,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) IP6CB(skb)->ra = optoff; return 1; } - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1])); + LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", + skb->nh.raw[optoff+1]); kfree_skb(skb); return 0; } @@ -437,8 +437,8 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1])); + LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + skb->nh.raw[optoff+1]); IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); goto drop; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ee9f1d36346c..ff685f229b69 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -332,8 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, * for now we don't know that. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); return; } @@ -341,8 +340,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); return; } @@ -393,8 +391,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmp: len problem\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); goto out_dst_release; } @@ -584,17 +581,15 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ICMPv6 hw checksum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n"); skb->ip_summed = CHECKSUM_NONE; } } if (skb->ip_summed == CHECKSUM_NONE) { if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb_checksum(skb, 0, skb->len, 0))) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", - NIP6(*saddr), NIP6(*daddr))); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + NIP6(*saddr), NIP6(*daddr)); goto discard_it; } } @@ -670,8 +665,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) break; default: - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6: msg of unknown type\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); /* informational */ if (type & ICMPV6_INFOMSG_MASK) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 00f85148b85f..01ef94f7c7f1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -625,7 +625,7 @@ slow_path: */ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { - NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); + NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index c8daef97cf56..f8626ebf90fd 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -28,8 +28,7 @@ int ip6_route_me_harder(struct sk_buff *skb) if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 766e1c7179a2..7a5863298f3f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -343,8 +343,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, skb->len, inet->num, skb->csum)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "raw v6 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1c21ad66cfad..08c55b024704 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1564,7 +1564,7 @@ static int tcp_v6_checksum_init(struct sk_buff *skb) if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n"); } if (skb->len <= 76) { if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2ffe34cc2ef8..c348307e5773 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -478,8 +478,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - LIMIT_NETDEBUG( - printk(KERN_INFO "IPv6: udp checksum is 0\n")); + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); goto discard; } @@ -494,7 +493,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) if (skb->ip_summed==CHECKSUM_HW) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } } @@ -826,7 +825,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); err = -EINVAL; goto out; } From 6687e988d9aeaccad6774e6a8304f681f3ec0a03 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 04:03:31 -0300 Subject: [PATCH 371/584] [ICSK]: Move TCP congestion avoidance members to icsk This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(), minimal renaming/moving done in this changeset to ease review. Most of it is just changes of struct tcp_sock * to struct sock * parameters. With this we move to a state closer to two interesting goals: 1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used for any INET transport protocol that has struct inet_hashinfo and are derived from struct inet_connection_sock. Keeps the userspace API, that will just not display DCCP sockets, while newer versions of tools can support DCCP. 2. INET generic transport pluggable Congestion Avoidance infrastructure, using the current TCP CA infrastructure with DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 21 +-- include/net/inet_connection_sock.h | 15 ++ include/net/tcp.h | 74 +++++----- net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/tcp.c | 12 +- net/ipv4/tcp_bic.c | 46 +++--- net/ipv4/tcp_cong.c | 44 +++--- net/ipv4/tcp_diag.c | 16 +-- net/ipv4/tcp_highspeed.c | 17 ++- net/ipv4/tcp_htcp.c | 53 ++++--- net/ipv4/tcp_hybla.c | 31 ++-- net/ipv4/tcp_input.c | 223 ++++++++++++++++------------- net/ipv4/tcp_ipv4.c | 9 +- net/ipv4/tcp_minisocks.c | 5 +- net/ipv4/tcp_output.c | 36 ++--- net/ipv4/tcp_scalable.c | 6 +- net/ipv4/tcp_timer.c | 26 ++-- net/ipv4/tcp_vegas.c | 44 +++--- net/ipv4/tcp_westwood.c | 58 ++++---- net/ipv6/tcp_ipv6.c | 7 +- 20 files changed, 412 insertions(+), 334 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 620096840744..ac4ca44c75ca 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,19 +258,15 @@ struct tcp_sock { __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ - __u8 ca_state; /* State of fast-retransmit machine */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 nonagle; /* Disable Nagle algorithm? */ - /* ONE BYTE HOLE, TRY TO PACK */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ @@ -311,8 +307,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 probes_out; /* unanswered 0 window probes */ - __u8 ecn_flags; /* ECN status bits. */ + __u16 advmss; /* Advertised MSS */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ __u32 sacked_out; /* SACK'd packets */ @@ -327,7 +322,7 @@ struct tcp_sock { __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ __u8 urg_mode; /* In urgent mode */ - /* ONE BYTE HOLE, TRY TO PACK! */ + __u8 ecn_flags; /* ECN status bits. */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ @@ -351,11 +346,6 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; - - /* Pluggable TCP congestion control hook */ - struct tcp_congestion_ops *ca_ops; - u32 ca_priv[16]; -#define TCP_CA_PRIV_SIZE (16*sizeof(u32)) }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -static inline void *tcp_ca(const struct tcp_sock *tp) -{ - return (void *) tp->ca_priv; -} - #endif #endif /* _LINUX_TCP_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index bec19d5cff26..4d7e708c07d1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -27,6 +27,7 @@ struct inet_bind_bucket; struct inet_hashinfo; +struct tcp_congestion_ops; /** inet_connection_sock - INET connection oriented sock * @@ -35,10 +36,13 @@ struct inet_hashinfo; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -50,10 +54,14 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -65,6 +73,8 @@ struct inet_connection_sock { __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority); diff --git a/include/net/tcp.h b/include/net/tcp.h index d489ac548e4b..0b3f7294c5c7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,29 +669,29 @@ struct tcp_congestion_ops { struct list_head list; /* initialize private data (optional) */ - void (*init)(struct tcp_sock *tp); + void (*init)(struct sock *sk); /* cleanup private data (optional) */ - void (*release)(struct tcp_sock *tp); + void (*release)(struct sock *sk); /* return slow start threshold (required) */ - u32 (*ssthresh)(struct tcp_sock *tp); + u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct tcp_sock *tp); + u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ - void (*set_state)(struct tcp_sock *tp, u8 new_state); + void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ - u32 (*undo_cwnd)(struct tcp_sock *tp); + u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for tcp_diag (optional) */ - void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; @@ -700,30 +700,34 @@ struct tcp_congestion_ops { extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_init_congestion_control(struct tcp_sock *tp); -extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); -extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); -extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { - if (tp->ca_ops->set_state) - tp->ca_ops->set_state(tp, ca_state); - tp->ca_state = ca_state; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; } -static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { - if (tp->ca_ops->cwnd_event) - tp->ca_ops->cwnd_event(tp, event); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); } /* This determines how many packets are "in the network" to the best @@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) } /* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) +static inline void __tcp_enter_cwr(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + tp->undo_marker = 0; - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 026630a15ea0..fe3c6d3d0c91 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eed64a1991d..02848e72e9c1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq = 1; icsk->icsk_backoff = 0; tp->snd_cwnd = 2; - tp->probes_out = 0; + icsk->icsk_probes_out = 0; tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); sk->sk_send_head = NULL; @@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(tp, name); + err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } @@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; - info->tcpi_ca_state = tp->ca_state; + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = tp->probes_out; + info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) @@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, tp->ca_ops->name, len)) + if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; default: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d6649..b940346de4e7 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } -static void bictcp_init(struct tcp_sock *tp) +static void bictcp_init(struct sock *sk) { - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); if (initial_ssthresh) - tp->snd_ssthresh = initial_ssthresh; + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* @@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) +static inline void bictcp_low_utilization(struct sock *sk, int flag) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); u32 dist, delay; /* No time stamp */ @@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) } -static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int data_acked) { - struct bictcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(tp, data_acked); + bictcp_low_utilization(sk, data_acked); if (in_flight < tp->snd_cwnd) return; @@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, * behave like Reno until low_window is reached, * then increase congestion window slowly */ -static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 bictcp_recalc_ssthresh(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct tcp_sock *tp) +static u32 bictcp_undo_cwnd(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); - + const struct tcp_sock *tp = tcp_sk(sk); + const struct bictcp *ca = inet_csk_ca(sk); return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct tcp_sock *tp) +static u32 bictcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void bictcp_state(struct tcp_sock *tp, u8 new_state) +static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgement ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct tcp_sock *tp, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt) { - if (cnt > 0 && tp->ca_state == TCP_CA_Open) { - struct bictcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } @@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a7785..bbf2d6624e89 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ -void tcp_init_congestion_control(struct tcp_sock *tp) +void tcp_init_congestion_control(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (tp->ca_ops != &tcp_init_congestion_ops) + if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) return; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { - tp->ca_ops = ca; + icsk->icsk_ca_ops = ca; break; } } rcu_read_unlock(); - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } /* Manage refcounts on socket close. */ -void tcp_cleanup_congestion_control(struct tcp_sock *tp) +void tcp_cleanup_congestion_control(struct sock *sk) { - if (tp->ca_ops->release) - tp->ca_ops->release(tp); - module_put(tp->ca_ops->owner); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + module_put(icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ @@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) } /* Change congestion control for socket */ -int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) +int tcp_set_congestion_control(struct sock *sk, const char *name) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); - if (ca == tp->ca_ops) + if (ca == icsk->icsk_ca_ops) goto out; if (!ca) @@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) err = -EBUSY; else { - tcp_cleanup_congestion_control(tp); - tp->ca_ops = ca; - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } out: rcu_read_unlock(); @@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + if (in_flight < tp->snd_cwnd) return; @@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct tcp_sock *tp) +u32 tcp_reno_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); /* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct tcp_sock *tp) +u32 tcp_reno_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5f4c74f45e82..4288ecfec9a7 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (ext & (1<<(TCPDIAG_CONG-1))) { - size_t len = strlen(tp->ca_ops->name); + if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), - tp->ca_ops->name); + icsk->icsk_ca_ops->name); } } r->tcpdiag_family = sk->sk_family; @@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->tcpdiag_timer = 0; r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_uid = sock_i_uid(sk); @@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (info) tcp_get_info(sk, info); - if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) - tp->ca_ops->get_info(tp, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); nlh->nlmsg_len = skb->tail - b; return skb->len; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136bf..6acc04bde080 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -98,9 +98,10 @@ struct hstcp { u32 ai; }; -static void hstcp_init(struct tcp_sock *tp) +static void hstcp_init(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; @@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, u32 in_flight, int good) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, } } -static u32 hstcp_ssthresh(struct tcp_sock *tp) +static u32 hstcp_ssthresh(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); @@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf9..e47b37984e95 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ca->snd_cwnd_cnt2 = 0; } -static u32 htcp_cwnd_undo(struct tcp_sock *tp) +static u32 htcp_cwnd_undo(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); ca->ccount = ca->undo_ccount; ca->maxRTT = ca->undo_maxRTT; ca->old_maxB = ca->undo_old_maxB; return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); } -static inline void measure_rtt(struct tcp_sock *tp) +static inline void measure_rtt(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 srtt = tp->srtt>>3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ca->minRTT = srtt; /* max RTT */ - if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) @@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) } } -static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 now = tcp_time_stamp; /* achieved throughput calculations */ - if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { + if (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Disorder) { ca->packetcount = 0; ca->lasttime = now; return; @@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ -static void htcp_param_update(struct tcp_sock *tp) +static void htcp_param_update(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); u32 minRTT = ca->minRTT; u32 maxRTT = ca->maxRTT; @@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; } -static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 htcp_recalc_ssthresh(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); - htcp_param_update(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct htcp *ca = inet_csk_ca(sk); + htcp_param_update(sk); return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int data_acked) { - struct htcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { - measure_rtt(tp); + measure_rtt(sk); /* keep track of number of round-trip times since last backoff event */ if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { @@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, } /* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct tcp_sock *tp) +static u32 htcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void htcp_init(struct tcp_sock *tp) +static void htcp_init(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); memset(ca, 0, sizeof(struct htcp)); ca->alpha = ALPHA_BASE; ca->beta = BETA_MIN; } -static void htcp_state(struct tcp_sock *tp, u8 new_state) +static void htcp_state(struct sock *sk, u8 new_state) { switch (new_state) { case TCP_CA_CWR: case TCP_CA_Recovery: case TCP_CA_Loss: - htcp_reset(tcp_ca(tp)); + htcp_reset(inet_csk_ca(sk)); break; } } @@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); if (!use_bandwidth_switch) htcp.pkts_acked = NULL; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c304..77add63623df 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); /* This is called to refresh values for hybla parameters */ -static inline void hybla_recalc_param (struct tcp_sock *tp) +static inline void hybla_recalc_param (struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >>7; } -static void hybla_init(struct tcp_sock *tp) +static void hybla_init(struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); ca->rho = 0; ca->rho2 = 0; @@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ - hybla_recalc_param(tp); + hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ ca->minrtt = tp->srtt; tp->snd_cwnd = ca->rho; } -static void hybla_state(struct tcp_sock *tp, u8 ca_state) +static void hybla_state(struct sock *sk, u8 ca_state) { - struct hybla *ca = tcp_ca(tp); - + struct hybla *ca = inet_csk_ca(sk); ca->hybla_en = (ca_state == TCP_CA_Open); } @@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); u32 increment, odd, rho_fractions; int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ if (tp->srtt < ca->minrtt){ - hybla_recalc_param(tp); + hybla_recalc_param(sk); ca->minrtt = tp->srtt; } if (!ca->hybla_en) - return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); if (in_flight < tp->snd_cwnd) return; if (ca->rho == 0) - hybla_recalc_param(tp); + hybla_recalc_param(sk); rho_fractions = ca->rho_3ls - (ca->rho << 3); @@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71d456148de7..fdd9547fb783 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -325,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk) /* 5. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) { + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - inet_csk(sk)->icsk_ack.quick = 0; + icsk->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -350,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) - app_win -= inet_csk(sk)->icsk_ack.rcv_mss; + if (app_win > icsk->icsk_ack.rcv_mss) + app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -549,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) +static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); long m = mrtt; /* RTT */ /* The following amusing code comes from Jacobson's @@ -610,8 +613,8 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) tp->rtt_seq = tp->snd_nxt; } - if (tp->ca_ops->rtt_sample) - tp->ca_ops->rtt_sample(tp, *usrtt); + if (icsk->icsk_ca_ops->rtt_sample) + icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -663,9 +666,10 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); if (dst && (dst->flags&DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); int m; - if (inet_csk(sk)->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -714,7 +718,7 @@ void tcp_update_metrics(struct sock *sk) tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; } else if (tp->snd_cwnd > tp->snd_ssthresh && - tp->ca_state == TCP_CA_Open) { + icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = @@ -828,8 +832,10 @@ reset: } } -static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) +static void tcp_update_reordering(struct sock *sk, const int metric, + const int ts) { + struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); @@ -844,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", - tp->rx_opt.sack_ok, tp->ca_state, + tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, tp->fackets_out, tp->sacked_out, @@ -906,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); @@ -1071,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ * we have to account for reordering! Ugly, * but should help. */ - if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { + if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { @@ -1100,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->left_out = tp->sacked_out + tp->lost_out; - if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); + if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); @@ -1118,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ void tcp_enter_frto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; tp->frto_counter = 1; - if (tp->ca_state <= TCP_CA_Disorder || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_FRTO); + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_FRTO); } /* Have to clear retransmission markers here to keep the bookkeeping @@ -1145,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk) } tcp_sync_left_out(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tp->frto_highmark = tp->snd_nxt; } @@ -1191,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); } @@ -1215,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) */ void tcp_enter_loss(struct sock *sk, int how) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_LOSS); + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_LOSS); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; @@ -1255,7 +1264,7 @@ void tcp_enter_loss(struct sock *sk, int how) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); } @@ -1272,13 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk) */ if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - inet_csk(sk)->icsk_retransmits++; + icsk->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + icsk->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1431,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */ -static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) +static void tcp_check_reno_reordering(struct sock *sk, const int addend) { + struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1440,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + tcp_update_reordering(sk, tp->packets_out + addend, 0); } } /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct tcp_sock *tp) +static void tcp_add_reno_sack(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->sacked_out++; - tcp_check_reno_reordering(tp, 0); + tcp_check_reno_reordering(sk, 0); tcp_sync_left_out(tp); } @@ -1464,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke else tp->sacked_out -= acked-1; } - tcp_check_reno_reordering(tp, acked); + tcp_check_reno_reordering(sk, acked); tcp_sync_left_out(tp); } @@ -1538,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) } /* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct tcp_sock *tp) +static void tcp_cwnd_down(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; - if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp)) + if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); @@ -1579,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct tcp_sock *tp, int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->prior_ssthresh) { - if (tp->ca_ops->undo_cwnd) - tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->undo_cwnd) + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); @@ -1611,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ - DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(tp, 1); - if (tp->ca_state == TCP_CA_Loss) + DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); + tcp_undo_cwr(sk, 1); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); else NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); @@ -1626,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) tcp_moderate_cwnd(tp); return 1; } - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 0; } @@ -1635,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) { if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, tp, "D-SACK"); - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); tp->undo_marker = 0; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); } @@ -1656,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, if (tp->retrans_out == 0) tp->retrans_stamp = 0; - tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); DBGUNDO(sk, tp, "Hoe"); - tcp_undo_cwr(tp, 0); + tcp_undo_cwr(sk, 0); NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. @@ -1682,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 1; } return 0; } -static inline void tcp_complete_cwr(struct tcp_sock *tp) +static inline void tcp_complete_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR); + tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) @@ -1708,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) tp->retrans_stamp = 0; if (flag&FLAG_ECE) - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); - if (tp->ca_state != TCP_CA_CWR) { + if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; if (tp->left_out || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; - if (tp->ca_state != state) { - tcp_set_ca_state(tp, state); + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); tp->high_seq = tp->snd_nxt; } tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); } } @@ -1741,6 +1760,7 @@ static void tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, int prior_packets, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); @@ -1764,7 +1784,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && - tp->ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); @@ -1775,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ - if (tp->ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Loss: - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1791,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_complete_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1803,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * catching for all duplicate ACKs. */ IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1812,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk, tp)) return; - tcp_complete_cwr(tp); + tcp_complete_cwr(sk); break; } } /* F. Process state. */ - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (prior_snd_una == tp->snd_una) { if (IsReno(tp) && is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) @@ -1832,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); return; } - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return; /* Loss is undone; fall through to processing in Open state. */ default: @@ -1846,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (tp->snd_una != prior_snd_una) tcp_reset_reno_sack(tp); if (is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } - if (tp->ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state == TCP_CA_Disorder) tcp_try_undo_dsack(sk, tp); if (!tcp_time_to_recover(sk, tp)) { @@ -1869,20 +1889,20 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; - if (tp->ca_state < TCP_CA_CWR) { + if (icsk->icsk_ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); } tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Recovery); + tcp_set_ca_state(sk, TCP_CA_Recovery); } if (is_dupack || tcp_head_timedout(sk, tp)) tcp_update_scoreboard(sk, tp); - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); tcp_xmit_retransmit_queue(sk); } @@ -1908,7 +1928,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) */ struct tcp_sock *tp = tcp_sk(sk); const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; - tcp_rtt_estimator(tp, seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1928,7 +1948,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1945,11 +1965,12 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } -static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good) { - tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good); - tp->snd_cwnd_stamp = tcp_time_stamp; + const struct inet_connection_sock *icsk = inet_csk(sk); + icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } /* Restart timer after forward progress on connection. @@ -2098,11 +2119,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { + const struct inet_connection_sock *icsk = inet_csk(sk); tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); - if (tp->ca_ops->pkts_acked) - tp->ca_ops->pkts_acked(tp, pkts_acked); + if (icsk->icsk_ca_ops->pkts_acked) + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); } #if FASTRETRANS_DEBUG > 0 @@ -2110,19 +2132,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); if (!tp->packets_out && tp->rx_opt.sack_ok) { + const struct inet_connection_sock *icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", - tp->lost_out, tp->ca_state); + tp->lost_out, icsk->icsk_ca_state); tp->lost_out = 0; } if (tp->sacked_out) { printk(KERN_DEBUG "Leak s=%u %d\n", - tp->sacked_out, tp->ca_state); + tp->sacked_out, icsk->icsk_ca_state); tp->sacked_out = 0; } if (tp->retrans_out) { printk(KERN_DEBUG "Leak r=%u %d\n", - tp->retrans_out, tp->ca_state); + tp->retrans_out, icsk->icsk_ca_state); tp->retrans_out = 0; } } @@ -2152,16 +2175,17 @@ static void tcp_ack_probe(struct sock *sk) } } -static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag) +static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) { return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || - tp->ca_state != TCP_CA_Open); + inet_csk(sk)->icsk_ca_state != TCP_CA_Open); } -static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) +static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) { + const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1<ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR)); + !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); } /* Check that window update is acceptable. @@ -2251,6 +2275,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; @@ -2278,7 +2303,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; - tcp_ca_event(tp, CA_EVENT_FAST_ACK); + tcp_ca_event(sk, CA_EVENT_FAST_ACK); NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); } else { @@ -2295,7 +2320,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) flag |= FLAG_ECE; - tcp_ca_event(tp, CA_EVENT_SLOW_ACK); + tcp_ca_event(sk, CA_EVENT_SLOW_ACK); } /* We passed data and got it acked, remove any soft error @@ -2311,19 +2336,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, - tp->ca_ops->rtt_sample ? &seq_usrtt : NULL); + icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); if (tp->frto_counter) tcp_process_frto(sk, prior_snd_una); - if (tcp_ack_is_dubious(tp, flag)) { + if (tcp_ack_is_dubious(sk, flag)) { /* Advanve CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 0); + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) @@ -2332,7 +2357,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - tp->probes_out = 0; + icsk->icsk_probes_out = 0; /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3301,12 +3326,12 @@ void tcp_cwnd_application_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->ca_state == TCP_CA_Open && + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ u32 win_used = max(tp->snd_cwnd_used, 2U); if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; } tp->snd_cwnd_used = 0; @@ -3935,7 +3960,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data * packet. @@ -4212,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on * first data packet. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32a0ebc589d5..97bbf595230d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = { */ static int tcp_v4_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_state = TCP_CLOSE; @@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) tcp_clear_xmit_timers(sk); - tcp_cleanup_congestion_control(tp); + tcp_cleanup_congestion_control(sk); /* Cleanup up the write buffer. */ sk_stream_writequeue_purge(sk); @@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dc085233d512..a88db28b0af7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newtp->ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_reno; - tcp_set_ca_state(newtp, TCP_CA_Open); + tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); newtp->rcv_wup = treq->rcv_isn + 1; @@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.dsack = 0; newtp->rx_opt.eff_sacks = 0; - newtp->probes_out = 0; newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f458eacb5ef2..267b0fcbfc9c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; - tcp_ca_event(tp, CA_EVENT_CWND_RESTART); + tcp_ca_event(sk, CA_EVENT_CWND_RESTART); - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) @@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (skb != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) #define SYSCTL_FLAG_SACK 0x4 /* If congestion control is doing timestamping */ - if (tp->ca_ops->rtt_sample) + if (icsk->icsk_ca_ops->rtt_sample) do_gettimeofday(&skb->stamp); sysctl_flags = 0; @@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) } if (tcp_packets_in_flight(tp) == 0) - tcp_ca_event(tp, CA_EVENT_TX_START); + tcp_ca_event(sk, CA_EVENT_TX_START); th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; @@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) if (err <= 0) return err; - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device @@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, */ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) return 0; - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return 0; in_flight = tcp_packets_in_flight(tp); @@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ void tcp_simple_retransmit(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); @@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk) * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ - if (tp->ca_state != TCP_CA_Loss) { + if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->prior_ssthresh = 0; tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk); } @@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ void tcp_xmit_retransmit_queue(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int packet_cnt = tp->lost_out; @@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) return; - if (tp->ca_state != TCP_CA_Loss) + if (icsk->icsk_ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); @@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ - if (tp->ca_state != TCP_CA_Recovery) + if (icsk->icsk_ca_state != TCP_CA_Recovery) return; /* No forward retransmissions in Reno are possible. */ @@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ - tp->probes_out = 0; + icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; return; } @@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < sysctl_tcp_retries2) icsk->icsk_backoff++; - tp->probes_out++; + icsk->icsk_probes_out++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, - * do not backoff and do not remember probes_out. + * do not backoff and do not remember icsk_probes_out. * Let local senders to fight for local resources. * * Use accumulated backoff yet. */ - if (!tp->probes_out) - tp->probes_out=1; + if (!icsk->icsk_probes_out) + icsk->icsk_probes_out = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RESOURCE_PROBE_INTERVAL), diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 70e108e15c71..327770bf5522 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -16,9 +16,10 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); if (in_flight < tp->snd_cwnd) return; @@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, tp->snd_cwnd_stamp = tcp_time_stamp; } -static u32 tcp_scalable_ssthresh(struct tcp_sock *tp) +static u32 tcp_scalable_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 72cec6981830..415ee47ac1c5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -233,11 +233,12 @@ out_unlock: static void tcp_probe_timer(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; if (tp->packets_out || !sk->sk_send_head) { - tp->probes_out = 0; + icsk->icsk_probes_out = 0; return; } @@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk) * FIXME: We ought not to do it, Solaris 2.5 actually has fixing * this behaviour in Solaris down as a bug fix. [AC] * - * Let me to explain. probes_out is zeroed by incoming ACKs + * Let me to explain. icsk_probes_out is zeroed by incoming ACKs * even if they advertise zero window. Hence, connection is killed only * if we received no ACKs for normal connection timeout. It is not killed * only because window stays zero for some time, window may be zero @@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - const struct inet_connection_sock *icsk = inet_csk(sk); const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes)) return; } - if (tp->probes_out > max_probes) { + if (icsk->icsk_probes_out > max_probes) { tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { - if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { + if (icsk->icsk_ca_state == TCP_CA_Disorder || + icsk->icsk_ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); } else { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); } - } else if (tp->ca_state == TCP_CA_Loss) { + } else if (icsk->icsk_ca_state == TCP_CA_Loss) { NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); } else { NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); @@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val) static void tcp_keepalive_timer (unsigned long data) { struct sock *sk = (struct sock *) data; + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 elapsed; @@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = tcp_time_stamp - tp->rcv_tstamp; if (elapsed >= keepalive_time_when(tp)) { - if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || - (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { + if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || + (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; } if (tcp_write_wakeup(sk) <= 0) { - tp->probes_out++; + icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 9bd443db5193..054de24efee2 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -82,9 +82,10 @@ struct vegas { * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ -static inline void vegas_enable(struct tcp_sock *tp) +static inline void vegas_enable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); /* Begin taking Vegas samples next time we send something. */ vegas->doing_vegas_now = 1; @@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp) } /* Stop taking Vegas samples for now. */ -static inline void vegas_disable(struct tcp_sock *tp) +static inline void vegas_disable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->doing_vegas_now = 0; } -static void tcp_vegas_init(struct tcp_sock *tp) +static void tcp_vegas_init(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; - vegas_enable(tp); + vegas_enable(sk); } /* Do RTT sampling needed for Vegas. @@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) +static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ /* Filter to find propagation delay: */ @@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) vegas->cntRTT++; } -static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) +static void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) - vegas_enable(tp); + vegas_enable(sk); else - vegas_disable(tp); + vegas_disable(sk); } /* @@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) * packets, _then_ we can make Vegas calculations * again. */ -static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) - tcp_vegas_init(tp); + tcp_vegas_init(sk); } -static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int flag) { - struct vegas *vegas = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, * but that's not too awful, since we're taking the min, * rather than averaging. */ - tcp_vegas_rtt_calc(tp, seq_rtt*1000); + tcp_vegas_rtt_calc(sk, seq_rtt * 1000); /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext, +static void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct vegas *ca = tcp_ca(tp); + const struct vegas *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct tcpvegas_info *info; @@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ef827242c940..d8a5a2b92e37 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -40,9 +40,9 @@ struct westwood { * way as soon as possible. It will reasonably happen within the first * RTT period of the connection lifetime. */ -static void tcp_westwood_init(struct tcp_sock *tp) +static void tcp_westwood_init(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); w->bk = 0; w->bw_ns_est = 0; @@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp) w->cumul_ack = 0; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_win_sx = tcp_time_stamp; - w->snd_una = tp->snd_una; + w->snd_una = tcp_sk(sk)->snd_una; } /* @@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); if (cnt > 0) - w->rtt = tp->srtt >> 3; + w->rtt = tcp_sk(sk)->srtt >> 3; } /* @@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) * It updates RTT evaluation window if it is the right moment to do * it. If so it calls filter for evaluating bandwidth. */ -static void westwood_update_window(struct tcp_sock *tp) +static void westwood_update_window(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); s32 delta = tcp_time_stamp - w->rtt_win_sx; /* @@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp) * header prediction is successful. In such case in fact update is * straight forward and doesn't need any particular care. */ -static inline void westwood_fast_bw(struct tcp_sock *tp) +static inline void westwood_fast_bw(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); - westwood_update_window(tp); + westwood_update_window(sk); w->bk += tp->snd_una - w->snd_una; w->snd_una = tp->snd_una; @@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp) * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. */ -static inline u32 westwood_acked_count(struct tcp_sock *tp) +static inline u32 westwood_acked_count(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); w->cumul_ack = tp->snd_una - w->snd_una; @@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp) return w->cumul_ack; } -static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) +static inline u32 westwood_bw_rttmin(const struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct westwood *w = inet_csk_ca(sk); return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } @@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * so avoids ever returning 0. */ -static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) +static u32 tcp_westwood_cwnd_min(struct sock *sk) { - return westwood_bw_rttmin(tp); + return westwood_bw_rttmin(sk); } -static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) { - struct westwood *w = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); switch(event) { case CA_EVENT_FAST_ACK: - westwood_fast_bw(tp); + westwood_fast_bw(sk); break; case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_FRTO: - tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_SLOW_ACK: - westwood_update_window(tp); - w->bk += westwood_acked_count(tp); + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); w->rtt_min = min(w->rtt, w->rtt_min); break; @@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, +static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct westwood *ca = tcp_ca(tp); + const struct westwood *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct rtattr *rta; struct tcpvegas_info *info; @@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 08c55b024704..3312cb8742e2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = { */ static int tcp_v6_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; tp->af_specific = &ipv6_specific; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, From 540722ffc3a0d7e11d97a13e1ce6f3bc23b061c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 05:54:28 -0300 Subject: [PATCH 372/584] [TCPDIAG]: Implement cheapest way of supporting DCCPDIAG_GETSOCK With ugly ifdefs, etc, but this actually: 1. keeps the existing ABI, i.e. no need to recompile the iproute2 utilities if not interested in DCCP. 2. Provides all the tcp_diag functionality in DCCP, with just a small patch that makes iproute2 support DCCP. Of course I'll get this cleaned-up in time, but for now I think its OK to be this way to quickly get this functionality. iproute2-ss050808 patch at: http://vger.kernel.org/~acme/iproute2-ss050808.dccp.patch Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 1 + net/dccp/ipv4.c | 2 + net/ipv4/tcp_diag.c | 86 ++++++++++++++++++++++++++++------------ 3 files changed, 64 insertions(+), 25 deletions(-) diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 7a5996743946..190494ebcfb8 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -3,6 +3,7 @@ /* Just some random number */ #define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 /* Socket identity */ struct tcpdiag_sockid diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f6da9328221e..d3770aed3b15 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -34,6 +34,8 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .port_rover = 1024 - 1, }; +EXPORT_SYMBOL_GPL(dccp_hashinfo); + static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) { return inet_csk_get_port(&dccp_hashinfo, sk, snum); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 4288ecfec9a7..f5fc84aaa9b4 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -45,11 +45,15 @@ static struct sock *tcpnl; #define TCPDIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +extern struct inet_hashinfo dccp_hashinfo; +#endif + static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, - int ext, u32 pid, u32 seq, u16 nlmsg_flags) + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; @@ -57,7 +61,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, struct tcpdiag_meminfo *minfo = NULL; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = nlmsg_flags; r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { @@ -147,8 +151,20 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + /* + * Ahem... for now we'll have some knowledge about TCP -acme + * But this is just one of two small exceptions, both in this + * function, so lets close our eyes for some 15 lines or so... 8) + * -acme + */ + if (sk->sk_protocol == IPPROTO_TCP) { + const struct tcp_sock *tp = tcp_sk(sk); + + r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + } else + r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; + r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = sock_i_ino(sk); @@ -159,8 +175,13 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); } - if (info) - tcp_get_info(sk, info); + /* Ahem... for now we'll have some knowledge about TCP -acme */ + if (info) { + if (sk->sk_protocol == IPPROTO_TCP) + tcp_get_info(sk, info); + else + memset(info, 0, sizeof(*info)); + } if (sk->sk_state < TCP_TIME_WAIT && icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) @@ -194,9 +215,13 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sock *sk; struct tcpdiagreq *req = NLMSG_DATA(nlh); struct sk_buff *rep; - + struct inet_hashinfo *hashinfo = &tcp_hashinfo; +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + if (nlh->nlmsg_type == DCCPDIAG_GETSOCK) + hashinfo = &dccp_hashinfo; +#endif if (req->tcpdiag_family == AF_INET) { - sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0], + sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, req->id.tcpdiag_src[0], req->id.tcpdiag_sport, req->id.tcpdiag_if); } @@ -230,7 +255,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) if (tcpdiag_fill(rep, sk, req->tcpdiag_ext, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0) <= 0) + nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); @@ -436,12 +461,13 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, } return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, - u32 pid, u32 seq) + u32 pid, u32 seq, + const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); @@ -450,7 +476,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = NLM_F_MULTI; r = NLMSG_DATA(nlh); @@ -526,7 +552,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.userlocks = sk->sk_userlocks; } - for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { + for (j = s_j; j < lopt->nr_table_entries; j++) { struct request_sock *req, *head = lopt->syn_table[j]; reqnum = 0; @@ -561,7 +587,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = tcpdiag_fill_req(skb, sk, req, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq); + cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; cb->args[4] = reqnum; @@ -583,20 +609,26 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) int i, num; int s_i, s_num; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_hashinfo *hashinfo; s_i = cb->args[1]; s_num = num = cb->args[2]; - + hashinfo = &tcp_hashinfo; +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK) + hashinfo = &dccp_hashinfo; +#endif if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(&tcp_hashinfo); + + inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; num = 0; - sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) { + sk_for_each(sk, node, &hashinfo->listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -614,7 +646,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); goto done; } @@ -623,7 +655,7 @@ syn_recv: goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); goto done; } @@ -637,7 +669,7 @@ next_listen: cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; @@ -646,8 +678,8 @@ skip_listen_ht: if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) return skb->len; - for (i = s_i; i < tcp_hashinfo.ehash_size; i++) { - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[i]; + for (i = s_i; i < hashinfo->ehash_size; i++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[i]; struct sock *sk; struct hlist_node *node; @@ -679,7 +711,7 @@ next_normal: if (r->tcpdiag_states&TCPF_TIME_WAIT) { sk_for_each(sk, node, - &tcp_hashinfo.ehash[i + tcp_hashinfo.ehash_size].chain) { + &hashinfo->ehash[i + hashinfo->ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) @@ -719,7 +751,11 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; - if (nlh->nlmsg_type != TCPDIAG_GETSOCK) + if (nlh->nlmsg_type != TCPDIAG_GETSOCK +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + && nlh->nlmsg_type != DCCPDIAG_GETSOCK +#endif + ) goto err_inval; if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) From 8c60f3fab55712f23f2bd557ceedfbb00c649f37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 12:59:38 -0300 Subject: [PATCH 373/584] [CCID3]: Separate most of the packet history code This also changes the list_for_each_entry_safe_continue behaviour to match its kerneldoc comment, that is, to start after the pos passed. Also adds several helper functions from previously open coded fragments, making the code more clear. Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/list.h | 3 +- net/dccp/Makefile | 3 +- net/dccp/ccids/ccid3.c | 327 ++++++++++++++++---------------------- net/dccp/ccids/ccid3.h | 18 --- net/dccp/packet_history.c | 198 +++++++++++++++++++++++ net/dccp/packet_history.h | 182 +++++++++++++++++++++ 6 files changed, 518 insertions(+), 213 deletions(-) create mode 100644 net/dccp/packet_history.c create mode 100644 net/dccp/packet_history.h diff --git a/include/linux/list.h b/include/linux/list.h index 597094e0fdb5..0f2435f92db3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -427,7 +427,8 @@ static inline void list_splice_init(struct list_head *list, * @member: the name of the list_struct within the struct. */ #define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + for (pos = list_entry(pos->member.next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) diff --git a/net/dccp/Makefile b/net/dccp/Makefile index c6e6ba55c36b..25a50bdbf1bb 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IP_DCCP) += dccp.o -dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ + timer.o packet_history.o obj-y += ccids/ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index df4adfeaafac..15c25f622000 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -34,8 +34,10 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include "../ccid.h" #include "../dccp.h" +#include "../packet_history.h" #include "ccid3.h" #ifdef CCID3_DEBUG @@ -82,61 +84,11 @@ enum ccid3_options { static int ccid3_debug; -static kmem_cache_t *ccid3_tx_hist_slab; -static kmem_cache_t *ccid3_rx_hist_slab; +struct dccp_tx_hist *ccid3_tx_hist; +struct dccp_rx_hist *ccid3_rx_hist; + static kmem_cache_t *ccid3_loss_interval_hist_slab; -static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) -{ - struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); - - if (entry != NULL) - entry->ccid3htx_sent = 0; - - return entry; -} - -static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_tx_hist_slab, entry); -} - -static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, - struct sk_buff *skb, - int prio) -{ - struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); - - if (entry != NULL) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->ccid3hrx_win_count = dh->dccph_ccval; - entry->ccid3hrx_type = dh->dccph_type; - entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; - do_gettimeofday(&(entry->ccid3hrx_tstamp)); - } - - return entry; -} - -static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_rx_hist_slab, entry); -} - -static void ccid3_rx_history_delete(struct list_head *hist) -{ - struct ccid3_rx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { - list_del_init(&entry->ccid3hrx_node); - kmem_cache_free(ccid3_rx_hist_slab, entry); - } -} - static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) { return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); @@ -982,7 +934,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *new_packet = NULL; + struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; int rc = -ENOTCONN; @@ -997,12 +949,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; /* See if last packet allocated was not sent */ - if (!list_empty(&hctx->ccid3hctx_hist)) - new_packet = list_entry(hctx->ccid3hctx_hist.next, - struct ccid3_tx_hist_entry, ccid3htx_node); - - if (new_packet == NULL || new_packet->ccid3htx_sent) { - new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (new_packet == NULL || new_packet->dccphtx_sent) { + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); rc = -ENOBUFS; if (new_packet == NULL) { @@ -1011,7 +960,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; } - list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } do_gettimeofday(&now); @@ -1054,7 +1003,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Can we send? if so add options and add to packet history */ if (rc == 0) - new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + new_packet->dccphtx_win_count = + DCCP_SKB_CB(skb)->dccpd_ccval = + hctx->ccid3hctx_last_win_count; out: return rc; } @@ -1063,7 +1014,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *packet = NULL; struct timeval now; // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); @@ -1080,20 +1030,23 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) /* check if we have sent a data packet */ if (len > 0) { unsigned long quarter_rtt; + struct dccp_tx_hist_entry *packet; - if (list_empty(&hctx->ccid3hctx_hist)) { + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (packet == NULL) { printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); return; } - packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); - if (packet->ccid3htx_sent) { + if (packet->dccphtx_sent) { printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); return; } - packet->ccid3htx_tstamp = now; - packet->ccid3htx_seqno = dp->dccps_gss; - // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); - + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dp->dccps_gss; +#if 0 + ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", + dccp_role(sk), sk, packet->dccphtx_seqno); +#endif /* * Check if win_count have changed */ /* COMPLIANCE_BEGIN @@ -1106,18 +1059,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) min_t(unsigned long, quarter_rtt, 5)) % 16; ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", dccp_role(sk), sk, - packet->ccid3htx_win_count, + packet->dccphtx_win_count, hctx->ccid3hctx_last_win_count); } /* COMPLIANCE_END */ #if 0 ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", dccp_role(sk), sk, - packet->ccid3htx_seqno, - packet->ccid3htx_win_count); + packet->dccphtx_seqno, + packet->dccphtx_win_count); #endif hctx->ccid3hctx_idle = 0; - packet->ccid3htx_sent = 1; + packet->dccphtx_sent = 1; } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", dccp_role(sk), sk, dp->dccps_gss); @@ -1152,7 +1105,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_options_received *opt_recv; - struct ccid3_tx_hist_entry *entry, *next, *packet; + struct dccp_tx_hist_entry *packet; unsigned long next_tmout; u16 t_elapsed; u32 pinv; @@ -1191,13 +1144,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Calculate new round trip sample by * R_sample = (now - t_recvdata) - t_delay */ /* get t_recvdata from history */ - packet = NULL; - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) - if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { - packet = entry; - break; - } - + packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, @@ -1206,7 +1154,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = now_delta(packet->ccid3htx_tstamp); + r_sample = now_delta(packet->dccphtx_tstamp); /* FIXME: */ // r_sample -= usecs_to_jiffies(t_elapsed * 10); @@ -1273,10 +1221,9 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ - list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge_older(ccid3_tx_hist, + &hctx->ccid3hctx_hist, packet); + if (hctx->ccid3hctx_x < 10) { ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); hctx->ccid3hctx_x = 10; @@ -1285,7 +1232,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ next_tmout = max(inet_csk(sk)->icsk_rto, - 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + (2 * (hctx->ccid3hctx_s * 100000) / + (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", @@ -1408,7 +1356,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *entry, *next; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); @@ -1417,10 +1364,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); /* Empty packet history */ - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); kfree(dp->dccps_hc_tx_ccid_private); dp->dccps_hc_tx_ccid_private = NULL; @@ -1462,39 +1406,40 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_state hcrx->ccid3hcrx_state = state; } -static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +static int ccid3_hc_rx_add_hist(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *entry, *next, *iter; u8 num_later = 0; - if (list_empty(&hcrx->ccid3hcrx_hist)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + iter = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + if (iter == NULL) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - u64 seqno = packet->ccid3hrx_seqno; - struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, - struct ccid3_rx_hist_entry, - ccid3hrx_node); - if (after48(seqno, iter->ccid3hrx_seqno)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + const u64 seqno = packet->dccphrx_seqno; + + if (after48(seqno, iter->dccphrx_seqno)) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later = 1; - list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (after48(seqno, iter->ccid3hrx_seqno)) { - list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + list_for_each_entry_continue(iter, + &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (after48(seqno, iter->dccphrx_seqno)) { + dccp_rx_hist_add_entry(&iter->dccphrx_node, + packet); goto trim_history; } - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later++; if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - ccid3_rx_hist_entry_delete(packet); + dccp_rx_hist_entry_delete(ccid3_rx_hist, packet); ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", dccp_role(sk), sk, seqno); return 1; @@ -1502,7 +1447,8 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *pac } if (num_later < TFRC_RECV_NUM_LATE_LOSS) - list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, + packet); /* FIXME: else what? should we destroy the packet like above? */ } } @@ -1512,12 +1458,12 @@ trim_history: num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } else { @@ -1528,7 +1474,8 @@ trim_history: * We have no loss interval history so we need at least one * rtt:s of data packets to approximate rtt. */ - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { switch (step) { case 0: @@ -1540,10 +1487,11 @@ trim_history: step = 2; /* OK, find next data packet */ num_later = 1; - win_count = entry->ccid3hrx_win_count; + win_count = entry->dccphrx_win_count; break; case 2: - tmp = win_count - entry->ccid3hrx_win_count; + tmp = (win_count - + entry->dccphrx_win_count); if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { @@ -1554,12 +1502,11 @@ trim_history: num_later = 1; break; case 3: - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); break; } - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } @@ -1571,7 +1518,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *packet; + struct dccp_rx_hist_entry *packet; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); @@ -1594,14 +1541,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) return; } - packet = NULL; - list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { - packet = entry; - break; - } - + packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (packet == NULL) { printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", __FUNCTION__, dccp_role(sk), sk); @@ -1610,12 +1550,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); - hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; - hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_last_counter = packet->dccphrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->dccphrx_tstamp) / 10; if (hcrx->ccid3hcrx_p == 0) hcrx->ccid3hcrx_pinv = ~0; else @@ -1686,7 +1626,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp, tmp_tv; int interval = 0; @@ -1694,19 +1634,19 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) int step = 0; u64 tmp1; - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { tail = entry; switch (step) { case 0: - tstamp = entry->ccid3hrx_tstamp; - win_count = entry->ccid3hrx_win_count; + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_win_count; step = 1; break; case 1: - interval = win_count - entry->ccid3hrx_win_count; + interval = win_count - entry->dccphrx_win_count; if (interval < 0) interval += TFRC_WIN_COUNT_LIMIT; if (interval > 4) @@ -1728,7 +1668,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: - timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -1797,34 +1737,33 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; - struct ccid3_rx_hist_entry *a_loss = NULL; - struct ccid3_rx_hist_entry *b_loss = NULL; + struct dccp_rx_hist_entry *entry, *next, *packet; + struct dccp_rx_hist_entry *a_loss = NULL; + struct dccp_rx_hist_entry *b_loss = NULL; u64 seq_loss = DCCP_MAX_SEQNO + 1; u8 win_loss = 0; u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { b_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } if (b_loss == NULL) goto out_update_li; - a_next = b_next; num_later = 1; - list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { a_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -1844,12 +1783,13 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) /* Locate a lost data packet */ entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, + packet->dccphrx_seqno); if (delta != 0) { - if (packet->ccid3hrx_type == DCCP_PKT_DATA || - packet->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(packet)) --delta; /* * FIXME: check this, probably this % usage is because @@ -1858,10 +1798,12 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) */ #if 0 if (delta % DCCP_NDP_LIMIT != - (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) + (packet->dccphrx_ndp - + entry->dccphrx_ndp) % DCCP_NDP_LIMIT) #endif - if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { - seq_loss = entry->ccid3hrx_seqno; + if (delta != + packet->dccphrx_ndp - entry->dccphrx_ndp) { + seq_loss = entry->dccphrx_seqno; dccp_inc_seqno(&seq_loss); } } @@ -1871,7 +1813,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->ccid3hrx_win_count; + win_loss = a_loss->dccphrx_win_count; out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); @@ -1920,7 +1862,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *packet; + struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; u32 p_prev; @@ -1964,14 +1906,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, + dp->dccps_options_received.dccpor_ndp, + skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", dccp_role(sk), sk); return; } - win_count = packet->ccid3hrx_win_count; + win_count = packet->dccphrx_win_count; ins = ccid3_hc_rx_add_hist(sk, packet); @@ -2060,7 +2004,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ - ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); @@ -2093,41 +2037,38 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); static __init int ccid3_module_init(void) { - int rc = -ENOMEM; + int rc = -ENOBUFS; - ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", - sizeof(struct ccid3_tx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_tx_hist_slab == NULL) + ccid3_rx_hist = dccp_rx_hist_new("ccid3"); + if (ccid3_rx_hist == NULL) goto out; - ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", - sizeof(struct ccid3_rx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_rx_hist_slab == NULL) - goto out_free_tx_history; + ccid3_tx_hist = dccp_tx_hist_new("ccid3"); + if (ccid3_tx_hist == NULL) + goto out_free_rx; - ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", - sizeof(struct ccid3_loss_interval_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + ccid3_loss_interval_hist_slab = kmem_cache_create("li_hist_ccid3", + sizeof(struct ccid3_loss_interval_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); if (ccid3_loss_interval_hist_slab == NULL) - goto out_free_rx_history; + goto out_free_tx; rc = ccid_register(&ccid3); if (rc != 0) goto out_free_loss_interval_history; - out: return rc; + out_free_loss_interval_history: kmem_cache_destroy(ccid3_loss_interval_hist_slab); ccid3_loss_interval_hist_slab = NULL; -out_free_rx_history: - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; -out_free_tx_history: - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; +out_free_tx: + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; +out_free_rx: + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; goto out; } module_init(ccid3_module_init); @@ -2136,13 +2077,13 @@ static __exit void ccid3_module_exit(void) { ccid_unregister(&ccid3); - if (ccid3_tx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; + if (ccid3_tx_hist != NULL) { + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; } - if (ccid3_rx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; + if (ccid3_rx_hist != NULL) { + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; } if (ccid3_loss_interval_hist_slab != NULL) { kmem_cache_destroy(ccid3_loss_interval_hist_slab); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5d6b623e64da..d2705fb74195 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,15 +38,6 @@ #include #include -#include - -struct ccid3_tx_hist_entry { - struct list_head ccid3htx_node; - u64 ccid3htx_seqno:48, - ccid3htx_win_count:8, - ccid3htx_sent:1; - struct timeval ccid3htx_tstamp; -}; struct ccid3_options_received { u64 ccid3or_seqno:48, @@ -102,15 +93,6 @@ struct ccid3_loss_interval_hist_entry { u32 ccid3lih_interval; }; -struct ccid3_rx_hist_entry { - struct list_head ccid3hrx_node; - u64 ccid3hrx_seqno:48, - ccid3hrx_win_count:4, - ccid3hrx_type:4; - u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval ccid3hrx_tstamp; -}; - struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c new file mode 100644 index 000000000000..6b414898f0c4 --- /dev/null +++ b/net/dccp/packet_history.c @@ -0,0 +1,198 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +#include "packet_history.h" + +struct dccp_rx_hist *dccp_rx_hist_new(const char *name) +{ + struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_rx_hist_mask[] = "rx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_rx_hist_mask, name); + hist->dccprxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_rx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccprxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_new); + +void dccp_rx_hist_delete(struct dccp_rx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccprxh_slab); + + kmem_cache_destroy(hist->dccprxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); + +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + +struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *packet = NULL; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); + +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h new file mode 100644 index 000000000000..565dc96506e9 --- /dev/null +++ b/net/dccp/packet_history.h @@ -0,0 +1,182 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DCCP_PKT_HIST_ +#define _DCCP_PKT_HIST_ + +#include +#include +#include +#include + +#include "dccp.h" + +struct dccp_tx_hist_entry { + struct list_head dccphtx_node; + u64 dccphtx_seqno:48, + dccphtx_win_count:8, + dccphtx_sent:1; + struct timeval dccphtx_tstamp; +}; + +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_win_count:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; + +struct dccp_tx_hist { + kmem_cache_t *dccptxh_slab; +}; + +extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); + +struct dccp_rx_hist { + kmem_cache_t *dccprxh_slab; +}; + +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const int prio) +{ + struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, + prio); + + if (entry != NULL) + entry->dccphtx_sent = 0; + + return entry; +} + +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq); + +static inline void dccp_tx_hist_add_entry(struct list_head *list, + struct dccp_tx_hist_entry *entry) +{ + list_add(&entry->dccphtx_node, list); +} + +extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *next); + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + +static inline struct dccp_tx_hist_entry *dccp_tx_hist_head(struct list_head *list) +{ + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; +} + +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const int prio) +{ + struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, + prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_win_count = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; + do_gettimeofday(&(entry->dccphrx_tstamp)); + } + + return entry; +} + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + +static inline void dccp_rx_hist_add_entry(struct list_head *list, + struct dccp_rx_hist_entry *entry) +{ + list_add(&entry->dccphrx_node, list); +} + +static inline struct dccp_rx_hist_entry *dccp_rx_hist_head(struct list_head *list) +{ + struct dccp_rx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_rx_hist_entry, + dccphrx_node); + return head; +} + +static inline int + dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) +{ + return entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK; +} + +#endif /* _DCCP_PKT_HIST_ */ From cef07fd6029c20f95571d09cefce45ee3276a920 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 13:29:27 -0300 Subject: [PATCH 374/584] [CCID3]: Ditch USEC_IN_SEC as time.h has USEC_PER_SEC That is equivalent, no need to have a private one. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 15c25f622000..80f12c990c00 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -55,12 +55,10 @@ extern int ccid3_debug; #define TFRC_STD_PACKET_SIZE 256 #define TFRC_MAX_PACKET_SIZE 65535 -#define USEC_IN_SEC 1000000 - -#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) +#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) /* two seconds as per CCID3 spec 11 */ -#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) /* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_WIN_COUNT_PER_RTT 4 @@ -155,20 +153,23 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_state hctx->ccid3hctx_state = state; } -static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { - +static void timeval_sub(struct timeval large, struct timeval small, + struct timeval *result) +{ result->tv_sec = large.tv_sec-small.tv_sec; if (large.tv_usec < small.tv_usec) { (result->tv_sec)--; - result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; + result->tv_usec = USEC_PER_SEC + + large.tv_usec - small.tv_usec; } else result->tv_usec = large.tv_usec-small.tv_usec; } -static inline void timeval_fix(struct timeval *tv) { - if (tv->tv_usec >= USEC_IN_SEC) { +static inline void timeval_fix(struct timeval *tv) +{ + if (tv->tv_usec >= USEC_PER_SEC) { tv->tv_sec++; - tv->tv_usec -= USEC_IN_SEC; + tv->tv_usec -= USEC_PER_SEC; } } @@ -1185,7 +1186,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample); /* Update timeout interval */ - inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC); /* Update receive rate */ hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ @@ -1210,7 +1212,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Update next send time */ if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { - (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; + hctx->ccid3hctx_t_nom.tv_usec += USEC_PER_SEC; (hctx->ccid3hctx_t_nom).tv_sec--; } /* FIXME - if no feedback then t_ipi can go > 1 second */ @@ -1344,7 +1346,7 @@ static int ccid3_hc_tx_init(struct sock *sk) hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ - inet_csk(sk)->icsk_rto = USEC_IN_SEC; + inet_csk(sk)->icsk_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); init_timer(&hctx->ccid3hctx_no_feedback_timer); @@ -1531,7 +1533,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) if (delta == 0) delta = 1; /* to prevent divide by zero */ - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * + USEC_PER_SEC) / delta; } break; default: @@ -1669,7 +1672,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } found: timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); - rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; + rtt = (tmp_tv.tv_sec * USEC_PER_SEC + tmp_tv.tv_usec) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); if (rtt == 0) @@ -1679,7 +1682,7 @@ found: if (delta == 0) delta = 1; - x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC) / delta; tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); From 07dc3f0718d2c88c3094a0aadeeb4744effc5439 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Wed, 10 Aug 2005 14:16:04 -0700 Subject: [PATCH 375/584] [NET]: Make use of ->private_data in sockfd_lookup Please consider the patch below which makes use of file->private_data to store the pointer to the socket, which avoids touching several unused cachelines in the dentry and inode in sockfd_lookup. Signed-off-by: Benjamin LaHaise Signed-off-by: David S. Miller --- net/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/socket.c b/net/socket.c index 6f2a17881972..5f76ab8a1594 100644 --- a/net/socket.c +++ b/net/socket.c @@ -404,6 +404,7 @@ int sock_map_fd(struct socket *sock) file->f_mode = FMODE_READ | FMODE_WRITE; file->f_flags = O_RDWR; file->f_pos = 0; + file->private_data = sock; fd_install(fd, file); } @@ -436,6 +437,9 @@ struct socket *sockfd_lookup(int fd, int *err) return NULL; } + if (file->f_op == &socket_file_ops) + return file->private_data; /* set in sock_map_fd */ + inode = file->f_dentry->d_inode; if (!S_ISSOCK(inode->i_mode)) { *err = -ENOTSOCK; From 14ab9b867aa6c107b4886bdc5b23f277ab10792e Mon Sep 17 00:00:00 2001 From: Peter Hagervall Date: Wed, 10 Aug 2005 14:18:16 -0700 Subject: [PATCH 376/584] [BNX2]: Possible sparse fixes, take two This patch contains the following possible cleanups/fixes: - use C99 struct initializers - make a few arrays and structs static - remove a few uses of literal 0 as NULL pointer - use convenience function instead of cast+dereference in bnx2_ioctl() - remove superfluous casts to u8 * in calls to readl/writel Signed-off-by: Peter Hagervall Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 8acc655ec1e8..3a9d6a8b90a2 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -52,7 +52,6 @@ static struct { { "HP NC370i Multifunction Gigabit Server Adapter" }, { "Broadcom NetXtreme II BCM5706 1000Base-SX" }, { "HP NC370F Multifunction Gigabit Server Adapter" }, - { 0 }, }; static struct pci_device_id bnx2_pci_tbl[] = { @@ -3507,11 +3506,11 @@ bnx2_test_registers(struct bnx2 *bp) rw_mask = reg_tbl[i].rw_mask; ro_mask = reg_tbl[i].ro_mask; - save_val = readl((u8 *) bp->regview + offset); + save_val = readl(bp->regview + offset); - writel(0, (u8 *) bp->regview + offset); + writel(0, bp->regview + offset); - val = readl((u8 *) bp->regview + offset); + val = readl(bp->regview + offset); if ((val & rw_mask) != 0) { goto reg_test_err; } @@ -3520,9 +3519,9 @@ bnx2_test_registers(struct bnx2 *bp) goto reg_test_err; } - writel(0xffffffff, (u8 *) bp->regview + offset); + writel(0xffffffff, bp->regview + offset); - val = readl((u8 *) bp->regview + offset); + val = readl(bp->regview + offset); if ((val & rw_mask) != rw_mask) { goto reg_test_err; } @@ -3531,11 +3530,11 @@ bnx2_test_registers(struct bnx2 *bp) goto reg_test_err; } - writel(save_val, (u8 *) bp->regview + offset); + writel(save_val, bp->regview + offset); continue; reg_test_err: - writel(save_val, (u8 *) bp->regview + offset); + writel(save_val, bp->regview + offset); ret = -ENODEV; break; } @@ -4698,7 +4697,7 @@ bnx2_set_rx_csum(struct net_device *dev, u32 data) #define BNX2_NUM_STATS 45 -struct { +static struct { char string[ETH_GSTRING_LEN]; } bnx2_stats_str_arr[BNX2_NUM_STATS] = { { "rx_bytes" }, @@ -4750,7 +4749,7 @@ struct { #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4) -unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { +static unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { STATS_OFFSET32(stat_IfHCInOctets_hi), STATS_OFFSET32(stat_IfHCInBadOctets_hi), STATS_OFFSET32(stat_IfHCOutOctets_hi), @@ -4801,7 +4800,7 @@ unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { /* stat_IfHCInBadOctets and stat_Dot3StatsCarrierSenseErrors are * skipped because of errata. */ -u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { +static u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { 8,0,8,8,8,8,8,8,8,8, 4,0,4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,4,4, @@ -4811,7 +4810,7 @@ u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { #define BNX2_NUM_TESTS 6 -struct { +static struct { char string[ETH_GSTRING_LEN]; } bnx2_tests_str_arr[BNX2_NUM_TESTS] = { { "register_test (offline)" }, @@ -4910,7 +4909,7 @@ bnx2_get_ethtool_stats(struct net_device *dev, struct bnx2 *bp = dev->priv; int i; u32 *hw_stats = (u32 *) bp->stats_blk; - u8 *stats_len_arr = 0; + u8 *stats_len_arr = NULL; if (hw_stats == NULL) { memset(buf, 0, sizeof(u64) * BNX2_NUM_STATS); @@ -5012,7 +5011,7 @@ static struct ethtool_ops bnx2_ethtool_ops = { static int bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data; + struct mii_ioctl_data *data = if_mii(ifr); struct bnx2 *bp = dev->priv; int err; @@ -5505,12 +5504,12 @@ bnx2_resume(struct pci_dev *pdev) } static struct pci_driver bnx2_pci_driver = { - name: DRV_MODULE_NAME, - id_table: bnx2_pci_tbl, - probe: bnx2_init_one, - remove: __devexit_p(bnx2_remove_one), - suspend: bnx2_suspend, - resume: bnx2_resume, + .name = DRV_MODULE_NAME, + .id_table = bnx2_pci_tbl, + .probe = bnx2_init_one, + .remove = __devexit_p(bnx2_remove_one), + .suspend = bnx2_suspend, + .resume = bnx2_resume, }; static int __init bnx2_init(void) From e41aac41e3856c87fee52c5b8bca71705d15449b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 11 Aug 2005 14:37:16 -0700 Subject: [PATCH 377/584] [TCPDIAG]: Introduce CONFIG_IP_TCPDIAG_DCCP Similar to CONFIG_IP_TCPDIAG_IPV6 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 9 ++++++--- net/ipv4/tcp_diag.c | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0b3d9f1d8069..c844954c1ad5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -419,15 +419,18 @@ config IP_TCPDIAG ---help--- Support for TCP socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable - at . If you want IPv6 support - and have selected IPv6 as a module, you need to build this as a - module too. + at . If you want IPv6 or DCCP + support and have selected IPv6 or DCCP as a module, you need to build + this as a module too. If unsure, say Y. config IP_TCPDIAG_IPV6 def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) +config IP_TCPDIAG_DCCP + def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) + config TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f5fc84aaa9b4..8bf495c698f8 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -45,7 +45,7 @@ static struct sock *tcpnl; #define TCPDIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +#ifdef CONFIG_IP_TCPDIAG_DCCP extern struct inet_hashinfo dccp_hashinfo; #endif @@ -216,7 +216,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct tcpdiagreq *req = NLMSG_DATA(nlh); struct sk_buff *rep; struct inet_hashinfo *hashinfo = &tcp_hashinfo; -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +#ifdef CONFIG_IP_TCPDIAG_DCCP if (nlh->nlmsg_type == DCCPDIAG_GETSOCK) hashinfo = &dccp_hashinfo; #endif @@ -614,7 +614,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_i = cb->args[1]; s_num = num = cb->args[2]; hashinfo = &tcp_hashinfo; -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +#ifdef CONFIG_IP_TCPDIAG_DCCP if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK) hashinfo = &dccp_hashinfo; #endif @@ -752,7 +752,7 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return 0; if (nlh->nlmsg_type != TCPDIAG_GETSOCK -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +#ifdef CONFIG_IP_TCPDIAG_DCCP && nlh->nlmsg_type != DCCPDIAG_GETSOCK #endif ) From 622439270c74f3ad4f69d1417aca4bb3b79514f4 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 11 Aug 2005 15:30:45 -0700 Subject: [PATCH 378/584] [NETFILTER]: Fix compilation when no PROC_FS enabled Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/netfilter/nf_log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 573e76a770d9..3e76bd0824a2 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -167,12 +167,12 @@ int __init netfilter_log_init(void) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; + pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); -#endif if (!pde) return -1; pde->proc_fops = &nflog_file_ops; - +#endif return 0; } From 5917ed961def82a4dba9198d11a75f79d115a8cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 11 Aug 2005 15:31:15 -0700 Subject: [PATCH 379/584] [NETFILTER]: Fix NF_QUEUE_NR() macro I obviously wanted to use bitwise-or, not logical or. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ac3c61411d4b..189ba67ba603 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ From 0a242efc4fb859b2da506cdf8f3366231602e4ff Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Thu, 11 Aug 2005 15:32:53 -0700 Subject: [PATCH 380/584] [NET]: Deinline netif_carrier_{on,off}(). # grep -r 'netif_carrier_o[nf]' linux-2.6.12 | wc -l 246 # size vmlinux.org vmlinux.carrier text data bss dec hex filename 4339634 1054414 259296 5653344 564360 vmlinux.org 4337710 1054414 259296 5651420 563bdc vmlinux.carrier And this ain't an allyesconfig kernel! Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++------------ net/sched/sch_generic.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 296cf93a65e0..d8e52edfd526 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -699,19 +699,9 @@ static inline int netif_carrier_ok(const struct net_device *dev) extern void __netdev_watchdog_up(struct net_device *dev); -static inline void netif_carrier_on(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); -} +extern void netif_carrier_on(struct net_device *dev); -static inline void netif_carrier_off(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); -} +extern void netif_carrier_off(struct net_device *dev); /* Hot-plugging. */ static inline int netif_device_present(struct net_device *dev) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d066c965342..99ceb91f0150 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -238,6 +238,20 @@ static void dev_watchdog_down(struct net_device *dev) spin_unlock_bh(&dev->xmit_lock); } +void netif_carrier_on(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) + linkwatch_fire_event(dev); + if (netif_running(dev)) + __netdev_watchdog_up(dev); +} + +void netif_carrier_off(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + linkwatch_fire_event(dev); +} + /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -600,6 +614,8 @@ void dev_shutdown(struct net_device *dev) } EXPORT_SYMBOL(__netdev_watchdog_up); +EXPORT_SYMBOL(netif_carrier_on); +EXPORT_SYMBOL(netif_carrier_off); EXPORT_SYMBOL(noop_qdisc); EXPORT_SYMBOL(noop_qdisc_ops); EXPORT_SYMBOL(qdisc_create_dflt); From b766b305d3f2d8be173e5d9853534ea1afdbabba Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 12 Aug 2005 11:36:44 -0700 Subject: [PATCH 381/584] [NETFILTER]: Fix gcc-3.4.x warning about iplicit operator precedence Fix gcc-3.4.x warning about iplicit operator precedence in NF_QUEUE_NR() Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 189ba67ba603..be365e70ee99 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) +#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ From 505cbfc577f3fa778005e2800b869eca25727d5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:19:38 -0300 Subject: [PATCH 382/584] [IPV6]: Generalise the tcp_v6_lookup routines In the same way as was done with the v4 counterparts, this will be moved to inet6_hashtables.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++ include/net/inet6_hashtables.h | 26 ++++++ net/ipv4/Kconfig | 3 - net/ipv4/tcp_diag.c | 40 ++++------ net/ipv6/tcp_ipv6.c | 139 ++++++++++++++++++--------------- 5 files changed, 122 insertions(+), 91 deletions(-) create mode 100644 include/net/inet6_hashtables.h diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 777339b68464..3c7dbc6a0a70 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,11 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +static inline int inet6_iif(const struct sk_buff *skb) +{ + return IP6CB(skb)->iif; +} + struct tcp6_request_sock { struct tcp_request_sock req; struct in6_addr loc_addr; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 000000000000..297c2b16967a --- /dev/null +++ b/include/net/inet6_hashtables.h @@ -0,0 +1,26 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET6_HASHTABLES_H +#define _INET6_HASHTABLES_H + +#include + +struct in6_addr; +struct inet_hashinfo; + +extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif); +#endif /* _INET6_HASHTABLES_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c844954c1ad5..a79b4f9c10c5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -425,9 +425,6 @@ config IP_TCPDIAG If unsure, say Y. -config IP_TCPDIAG_IPV6 - def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) - config IP_TCPDIAG_DCCP def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 8bf495c698f8..b812191b2f5c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -24,6 +24,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -102,7 +106,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = 0; r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); @@ -121,7 +125,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_src[0] = inet->rcv_saddr; r->id.tcpdiag_dst[0] = inet->daddr; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -196,19 +200,6 @@ nlmsg_failure: return -1; } -#ifdef CONFIG_IP_TCPDIAG_IPV6 -extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif); -#else -static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) -{ - return NULL; -} -#endif - static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { int err; @@ -225,11 +216,14 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) req->id.tcpdiag_dport, req->id.tcpdiag_src[0], req->id.tcpdiag_sport, req->id.tcpdiag_if); } -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->tcpdiag_family == AF_INET6) { - sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport, - (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport, - req->id.tcpdiag_if); + sk = inet6_lookup(hashinfo, + (struct in6_addr*)req->id.tcpdiag_dst, + req->id.tcpdiag_dport, + (struct in6_addr*)req->id.tcpdiag_src, + req->id.tcpdiag_sport, + req->id.tcpdiag_if); } #endif else { @@ -440,7 +434,7 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (entry.family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -502,7 +496,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, &tcp6_rsk(req)->loc_addr); @@ -567,13 +561,13 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { entry.saddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->rmt_addr.s6_addr32 : #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3312cb8742e2..2bc7fafe7668 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,26 +76,27 @@ static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; /* I have no idea if this is a good hash for v6 or not. -DaveM */ -static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, - struct in6_addr *faddr, u16 fport) +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) { int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; - return (hashent & (tcp_hashinfo.ehash_size - 1)); + return (hashent & (ehash_size - 1)); } -static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *laddr = &np->rcv_saddr; - struct in6_addr *faddr = &np->daddr; - __u16 lport = inet->num; - __u16 fport = inet->dport; - return tcp_v6_hashfn(laddr, lport, faddr, fport); + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); } static inline int tcp_v6_bind_conflict(const struct sock *sk, @@ -231,7 +232,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = tcp_v6_sk_hashfn(sk); + sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); @@ -258,7 +259,10 @@ static void tcp_v6_hash(struct sock *sk) } } -static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) +static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif) { struct sock *sk; struct hlist_node *node; @@ -266,8 +270,8 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; - read_lock(&tcp_hashinfo.lhash_lock); - sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) { + read_lock(&hashinfo->lhash_lock); + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -294,7 +298,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor } if (result) sock_hold(result); - read_unlock(&tcp_hashinfo.lhash_lock); + read_unlock(&hashinfo->lhash_lock); return result; } @@ -304,9 +308,13 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor * The sockhash lock must be held as a reader here. */ -static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { struct sock *sk; const struct hlist_node *node; @@ -314,8 +322,9 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { @@ -324,7 +333,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -347,34 +356,36 @@ hit: } -static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { - struct sock *sk; - - sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif); - + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); if (sk) return sk; - return tcp_v6_lookup_listener(daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); } -inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) +inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif) { struct sock *sk; local_bh_disable(); - sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk; } -EXPORT_SYMBOL_GPL(tcp_v6_lookup); +EXPORT_SYMBOL_GPL(inet6_lookup); /* @@ -454,16 +465,17 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, __u16 lport, +static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr = &np->rcv_saddr; - struct in6_addr *saddr = &np->daddr; - int dif = sk->sk_bound_dev_if; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, + tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; const struct hlist_node *node; @@ -637,11 +649,6 @@ out: } } -static __inline__ int tcp_v6_iif(struct sk_buff *skb) -{ - return IP6CB(skb)->iif; -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -833,14 +840,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; - sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, + th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -927,7 +935,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, tcp_v6_iif(skb)); + &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -1138,7 +1146,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1207,7 +1215,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1245,20 +1253,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; - struct tcphdr *th = skb->h.th; + const struct tcphdr *th = skb->h.th; struct sock *nsk; /* Find possible connection requests. */ req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); + &skb->nh.ipv6h->daddr, inet6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr, - th->source, - &skb->nh.ipv6h->daddr, - ntohs(th->dest), - tcp_v6_iif(skb)); + nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, + th->source, &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1346,7 +1352,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = tcp_v6_iif(skb); + treq->iif = inet6_iif(skb); if (isn == 0) isn = tcp_v6_init_sequence(sk,skb); @@ -1411,7 +1417,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* @@ -1516,7 +1522,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* Clone native IPv6 options from listening socket (if any) @@ -1691,7 +1697,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo) - np->mcast_oif = tcp_v6_iif(opt_skb); + np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { @@ -1746,8 +1752,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); TCP_SKB_CB(skb)->sacked = 0; - sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source, - &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, + &skb->nh.ipv6h->daddr, ntohs(th->dest), + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1818,7 +1825,9 @@ do_time_wait: { struct sock *sk2; - sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk2 = inet6_lookup_listener(&tcp_hashinfo, + &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); From 5324a040ccc708998e61ea93e669b81312f0ae11 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:26:18 -0300 Subject: [PATCH 383/584] [INET6_HASHTABLES]: Move inet6_lookup functions to net/ipv6/inet6_hashtables.c Doing this we allow tcp_diag to support IPV6 even if tcp_diag is compiled statically and IPV6 is compiled as a module, removing the previous restriction while not building any IPV6 code if it is not selected. Now to work on the tcpdiag_register infrastructure and then to rename the whole thing to inetdiag, reflecting its by then completely generic nature. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 106 ++++++++++++++++++++++- net/ipv4/Kconfig | 4 +- net/ipv6/Makefile | 2 + net/ipv6/inet6_hashtables.c | 81 +++++++++++++++++ net/ipv6/tcp_ipv6.c | 154 +-------------------------------- 5 files changed, 190 insertions(+), 157 deletions(-) create mode 100644 net/ipv6/inet6_hashtables.c diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 297c2b16967a..03df3b157960 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -14,13 +14,117 @@ #ifndef _INET6_HASHTABLES_H #define _INET6_HASHTABLES_H +#include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#include #include -struct in6_addr; +#include + struct inet_hashinfo; +/* I have no idea if this is a good hash for v6 or not. -DaveM */ +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) +{ + int hashent = (lport ^ fport); + + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); + hashent ^= hashent >> 16; + hashent ^= hashent >> 8; + return (hashent & (ehash_size - 1)); +} + +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so + * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM + * + * The sockhash lock must be held as a reader here. + */ +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + /* For IPV6 do the cheaper port and family tests first. */ + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + const struct inet_timewait_sock *tw = inet_twsk(sk); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk->sk_family == PF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + goto hit; + } + } + read_unlock(&head->lock); + return NULL; + +hit: + sock_hold(sk); + read_unlock(&head->lock); + return sk; +} + +extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); + if (sk) + return sk; + + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); +} + extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 dport, const int dif); +#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a79b4f9c10c5..960c02faf440 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -419,9 +419,7 @@ config IP_TCPDIAG ---help--- Support for TCP socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable - at . If you want IPv6 or DCCP - support and have selected IPv6 or DCCP as a module, you need to build - this as a module too. + at . If unsure, say Y. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5bccea2d81b4..6460eec834b7 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -23,3 +23,5 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-y += exthdrs_core.o + +obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c new file mode 100644 index 000000000000..01d5f46d4e40 --- /dev/null +++ b/net/ipv6/inet6_hashtables.c @@ -0,0 +1,81 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic INET6 transport hashtables + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include + +#include +#include +#include + +struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + struct sock *result = NULL; + int score, hiscore = 0; + + read_lock(&hashinfo->lhash_lock); + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + continue; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score++; + } + if (score == 3) { + result = sk; + break; + } + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + } + if (result) + sock_hold(result); + read_unlock(&hashinfo->lhash_lock); + return result; +} + +EXPORT_SYMBOL_GPL(inet6_lookup_listener); + +struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif) +{ + struct sock *sk; + + local_bh_disable(); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + local_bh_enable(); + + return sk; +} + +EXPORT_SYMBOL_GPL(inet6_lookup); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2bc7fafe7668..fb291b81cf63 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -75,30 +76,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; -/* I have no idea if this is a good hash for v6 or not. -DaveM */ -static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const u16 fport, - const int ehash_size) -{ - int hashent = (lport ^ fport); - - hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); - hashent ^= hashent>>16; - hashent ^= hashent>>8; - return (hashent & (ehash_size - 1)); -} - -static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *laddr = &np->rcv_saddr; - const struct in6_addr *faddr = &np->daddr; - const __u16 lport = inet->num; - const __u16 fport = inet->dport; - return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); -} - static inline int tcp_v6_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { @@ -259,135 +236,6 @@ static void tcp_v6_hash(struct sock *sk) } } -static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, - const struct in6_addr *daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *sk; - struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore; - - hiscore=0; - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { - if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - if (result) - sock_hold(result); - read_unlock(&hashinfo->lhash_lock); - return result; -} - -/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so - * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM - * - * The sockhash lock must be held as a reader here. - */ - -static inline struct sock * - __inet6_lookup_established(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, - const u16 sport, - const struct in6_addr *daddr, - const u16 hnum, - const int dif) -{ - struct sock *sk; - const struct hlist_node *node; - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, - hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; - - read_lock(&head->lock); - sk_for_each(sk, node, &head->chain) { - /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ - } - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk->sk_family == PF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); - - if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) - goto hit; - } - } - read_unlock(&head->lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(&head->lock); - return sk; -} - - -static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, - const u16 sport, - const struct in6_addr *daddr, - const u16 hnum, - const int dif) -{ - struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); - if (sk) - return sk; - - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); -} - -inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, const u16 sport, - const struct in6_addr *daddr, const u16 dport, - const int dif) -{ - struct sock *sk; - - local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); - local_bh_enable(); - - return sk; -} - -EXPORT_SYMBOL_GPL(inet6_lookup); - - /* * Open request hash tables. */ From 4f5736c4c7cf6f9bd8db82b712cfdd51c87e06b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:27:49 -0300 Subject: [PATCH 384/584] [TCPDIAG]: Introduce inet_diag_{register,unregister} Next changeset will rename tcp_diag to inet_diag and move the tcp_diag code out of it and into a new tcp_diag.c, similar to the net/dccp/diag.c introduced in this changeset, completing the transition to a generic inet_diag infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 19 +++++ net/dccp/Kconfig | 5 ++ net/dccp/Makefile | 4 + net/dccp/diag.c | 47 ++++++++++++ net/ipv4/Kconfig | 3 - net/ipv4/tcp_diag.c | 153 ++++++++++++++++++++++++++++----------- 6 files changed, 186 insertions(+), 45 deletions(-) create mode 100644 net/dccp/diag.c diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 190494ebcfb8..910c34ba19c0 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -5,6 +5,8 @@ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 +#define INET_DIAG_GETSOCK_MAX 24 + /* Socket identity */ struct tcpdiag_sockid { @@ -125,4 +127,21 @@ struct tcpvegas_info { __u32 tcpv_minrtt; }; +#ifdef __KERNEL__ +struct sock; +struct inet_hashinfo; + +struct inet_diag_handler { + struct inet_hashinfo *idiag_hashinfo; + void (*idiag_get_info)(struct sock *sk, + struct tcpdiagmsg *r, + void *info); + __u16 idiag_info_size; + __u16 idiag_type; +}; + +extern int inet_diag_register(const struct inet_diag_handler *handler); +extern void inet_diag_unregister(const struct inet_diag_handler *handler); +#endif /* __KERNEL__ */ + #endif /* _TCP_DIAG_H_ */ diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 90460bc629b3..ff5b5459b97a 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -19,6 +19,11 @@ config IP_DCCP If in doubt, say N. +config IP_DCCP_DIAG + depends on IP_DCCP && IP_TCPDIAG + def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + def_tristate m + source "net/dccp/ccids/Kconfig" endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 25a50bdbf1bb..5741fffc436f 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,4 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o packet_history.o +obj-$(CONFIG_IP_DCCP_DIAG) += dccp_diag.o + obj-y += ccids/ + +dccp_diag-y := diag.o diff --git a/net/dccp/diag.c b/net/dccp/diag.c new file mode 100644 index 000000000000..4d9037c56ddc --- /dev/null +++ b/net/dccp/diag.c @@ -0,0 +1,47 @@ +/* + * net/dccp/diag.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include +#include + +#include "dccp.h" + +static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, + void *_info) +{ + r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; +} + +static struct inet_diag_handler dccp_diag_handler = { + .idiag_hashinfo = &dccp_hashinfo, + .idiag_get_info = dccp_diag_get_info, + .idiag_type = DCCPDIAG_GETSOCK, + .idiag_info_size = 0, +}; + +static int __init dccp_diag_init(void) +{ + return inet_diag_register(&dccp_diag_handler); +} + +static void __exit dccp_diag_fini(void) +{ + inet_diag_unregister(&dccp_diag_handler); +} + +module_init(dccp_diag_init); +module_exit(dccp_diag_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP inet_diag handler"); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 960c02faf440..1e6db2a896b9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -423,9 +423,6 @@ config IP_TCPDIAG If unsure, say Y. -config IP_TCPDIAG_DCCP - def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) - config TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b812191b2f5c..b13b71cb9ced 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,6 +34,8 @@ #include +static const struct inet_diag_handler **inet_diag_table; + struct tcpdiag_entry { u32 *saddr; @@ -61,18 +63,24 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; - struct tcp_info *info = NULL; + void *info = NULL; struct tcpdiag_meminfo *minfo = NULL; unsigned char *b = skb->tail; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[unlh->nlmsg_type]; + BUG_ON(handler == NULL); nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = nlmsg_flags; + r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { if (ext & (1<<(TCPDIAG_MEMINFO-1))) minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo)); if (ext & (1<<(TCPDIAG_INFO-1))) - info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); + info = TCPDIAG_PUT(skb, TCPDIAG_INFO, + handler->idiag_info_size); if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { size_t len = strlen(icsk->icsk_ca_ops->name); @@ -155,19 +163,6 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - /* - * Ahem... for now we'll have some knowledge about TCP -acme - * But this is just one of two small exceptions, both in this - * function, so lets close our eyes for some 15 lines or so... 8) - * -acme - */ - if (sk->sk_protocol == IPPROTO_TCP) { - const struct tcp_sock *tp = tcp_sk(sk); - - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; - } else - r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = sock_i_ino(sk); @@ -179,13 +174,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); } - /* Ahem... for now we'll have some knowledge about TCP -acme */ - if (info) { - if (sk->sk_protocol == IPPROTO_TCP) - tcp_get_info(sk, info); - else - memset(info, 0, sizeof(*info)); - } + handler->idiag_get_info(sk, r, info); if (sk->sk_state < TCP_TIME_WAIT && icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) @@ -206,11 +195,13 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sock *sk; struct tcpdiagreq *req = NLMSG_DATA(nlh); struct sk_buff *rep; - struct inet_hashinfo *hashinfo = &tcp_hashinfo; -#ifdef CONFIG_IP_TCPDIAG_DCCP - if (nlh->nlmsg_type == DCCPDIAG_GETSOCK) - hashinfo = &dccp_hashinfo; -#endif + struct inet_hashinfo *hashinfo; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + if (req->tcpdiag_family == AF_INET) { sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, req->id.tcpdiag_src[0], @@ -241,9 +232,10 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+ - sizeof(struct tcpdiag_meminfo)+ - sizeof(struct tcp_info)+64), GFP_KERNEL); + rep = alloc_skb(NLMSG_SPACE((sizeof(struct tcpdiagmsg) + + sizeof(struct tcpdiag_meminfo) + + handler->idiag_info_size + 64)), + GFP_KERNEL); if (!rep) goto out; @@ -603,15 +595,16 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) int i, num; int s_i, s_num; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; + handler = inet_diag_table[cb->nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + s_i = cb->args[1]; s_num = num = cb->args[2]; - hashinfo = &tcp_hashinfo; -#ifdef CONFIG_IP_TCPDIAG_DCCP - if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK) - hashinfo = &dccp_hashinfo; -#endif + if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; @@ -745,13 +738,12 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; - if (nlh->nlmsg_type != TCPDIAG_GETSOCK -#ifdef CONFIG_IP_TCPDIAG_DCCP - && nlh->nlmsg_type != DCCPDIAG_GETSOCK -#endif - ) + if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) goto err_inval; + if (inet_diag_table[nlh->nlmsg_type] == NULL) + return -ENOENT; + if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) goto err_inval; @@ -803,18 +795,95 @@ static void tcpdiag_rcv(struct sock *sk, int len) } } +static void tcp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, + void *_info) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_info *info = _info; + + r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + if (info != NULL) + tcp_get_info(sk, info); +} + +static struct inet_diag_handler tcp_diag_handler = { + .idiag_hashinfo = &tcp_hashinfo, + .idiag_get_info = tcp_diag_get_info, + .idiag_type = TCPDIAG_GETSOCK, + .idiag_info_size = sizeof(struct tcp_info), +}; + +static DEFINE_SPINLOCK(inet_diag_register_lock); + +int inet_diag_register(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + int err = -EINVAL; + + if (type >= INET_DIAG_GETSOCK_MAX) + goto out; + + spin_lock(&inet_diag_register_lock); + err = -EEXIST; + if (inet_diag_table[type] == NULL) { + inet_diag_table[type] = h; + err = 0; + } + spin_unlock(&inet_diag_register_lock); +out: + return err; +} +EXPORT_SYMBOL_GPL(inet_diag_register); + +void inet_diag_unregister(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + + if (type >= INET_DIAG_GETSOCK_MAX) + return; + + spin_lock(&inet_diag_register_lock); + inet_diag_table[type] = NULL; + spin_unlock(&inet_diag_register_lock); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(inet_diag_unregister); + static int __init tcpdiag_init(void) { + const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + sizeof(struct inet_diag_handler *)); + int err = -ENOMEM; + + inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); + if (!inet_diag_table) + goto out; + + memset(inet_diag_table, 0, inet_diag_table_size); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, THIS_MODULE); if (tcpnl == NULL) - return -ENOMEM; - return 0; + goto out_free_table; + + err = inet_diag_register(&tcp_diag_handler); + if (err) + goto out_sock_release; +out: + return err; +out_sock_release: + sock_release(tcpnl->sk_socket); +out_free_table: + kfree(inet_diag_table); + goto out; } static void __exit tcpdiag_exit(void) { sock_release(tcpnl->sk_socket); + kfree(inet_diag_table); } module_init(tcpdiag_init); From 73c1f4a033675f168df7e98bbeeafca3c644b8a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:51:49 -0300 Subject: [PATCH 385/584] [TCPDIAG]: Just rename everything to inet_diag Next changeset will rename tcp_diag.[ch] to inet_diag.[ch]. I'm taking this longer route so as to easy review, making clear the changes made all along the way. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/linux/tcp_diag.h | 131 ++++++------- include/net/tcp.h | 2 +- net/dccp/Kconfig | 4 +- net/dccp/diag.c | 4 +- net/ipv4/Kconfig | 10 +- net/ipv4/Makefile | 2 +- net/ipv4/tcp_diag.c | 391 ++++++++++++++++++++------------------- net/ipv4/tcp_vegas.c | 4 +- net/ipv4/tcp_westwood.c | 4 +- 10 files changed, 273 insertions(+), 281 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1c50fea8995b..d5e09bcd80f9 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -8,7 +8,7 @@ #define NETLINK_W1 1 /* 1-wire subsystem */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ #define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ +#define NETLINK_INET_DIAG 4 /* INET socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 910c34ba19c0..a4606e5810e5 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -1,5 +1,5 @@ -#ifndef _TCP_DIAG_H_ -#define _TCP_DIAG_H_ 1 +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 /* Just some random number */ #define TCPDIAG_GETSOCK 18 @@ -8,39 +8,36 @@ #define INET_DIAG_GETSOCK_MAX 24 /* Socket identity */ -struct tcpdiag_sockid -{ - __u16 tcpdiag_sport; - __u16 tcpdiag_dport; - __u32 tcpdiag_src[4]; - __u32 tcpdiag_dst[4]; - __u32 tcpdiag_if; - __u32 tcpdiag_cookie[2]; -#define TCPDIAG_NOCOOKIE (~0U) +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) }; /* Request structure */ -struct tcpdiagreq -{ - __u8 tcpdiag_family; /* Family of addresses. */ - __u8 tcpdiag_src_len; - __u8 tcpdiag_dst_len; - __u8 tcpdiag_ext; /* Query extended information */ +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ - struct tcpdiag_sockid id; + struct inet_diag_sockid id; - __u32 tcpdiag_states; /* States to dump */ - __u32 tcpdiag_dbs; /* Tables to dump (NI) */ + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ }; -enum -{ - TCPDIAG_REQ_NONE, - TCPDIAG_REQ_BYTECODE, +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, }; -#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: @@ -48,28 +45,25 @@ enum * length of the command and its arguments. */ -struct tcpdiag_bc_op -{ +struct inet_diag_bc_op { unsigned char code; unsigned char yes; unsigned short no; }; -enum -{ - TCPDIAG_BC_NOP, - TCPDIAG_BC_JMP, - TCPDIAG_BC_S_GE, - TCPDIAG_BC_S_LE, - TCPDIAG_BC_D_GE, - TCPDIAG_BC_D_LE, - TCPDIAG_BC_AUTO, - TCPDIAG_BC_S_COND, - TCPDIAG_BC_D_COND, +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, }; -struct tcpdiag_hostcond -{ +struct inet_diag_hostcond { __u8 family; __u8 prefix_len; int port; @@ -78,47 +72,44 @@ struct tcpdiag_hostcond /* Base info structure. It contains socket identity (addrs/ports/cookie) * and, alas, the information shown by netstat. */ -struct tcpdiagmsg -{ - __u8 tcpdiag_family; - __u8 tcpdiag_state; - __u8 tcpdiag_timer; - __u8 tcpdiag_retrans; +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; - struct tcpdiag_sockid id; + struct inet_diag_sockid id; - __u32 tcpdiag_expires; - __u32 tcpdiag_rqueue; - __u32 tcpdiag_wqueue; - __u32 tcpdiag_uid; - __u32 tcpdiag_inode; + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; }; /* Extensions */ -enum -{ - TCPDIAG_NONE, - TCPDIAG_MEMINFO, - TCPDIAG_INFO, - TCPDIAG_VEGASINFO, - TCPDIAG_CONG, +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, }; -#define TCPDIAG_MAX TCPDIAG_CONG +#define INET_DIAG_MAX INET_DIAG_CONG -/* TCPDIAG_MEM */ +/* INET_DIAG_MEM */ -struct tcpdiag_meminfo -{ - __u32 tcpdiag_rmem; - __u32 tcpdiag_wmem; - __u32 tcpdiag_fmem; - __u32 tcpdiag_tmem; +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; }; -/* TCPDIAG_VEGASINFO */ +/* INET_DIAG_VEGASINFO */ struct tcpvegas_info { __u32 tcpv_enabled; @@ -134,7 +125,7 @@ struct inet_hashinfo; struct inet_diag_handler { struct inet_hashinfo *idiag_hashinfo; void (*idiag_get_info)(struct sock *sk, - struct tcpdiagmsg *r, + struct inet_diag_msg *r, void *info); __u16 idiag_info_size; __u16 idiag_type; @@ -144,4 +135,4 @@ extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* __KERNEL__ */ -#endif /* _TCP_DIAG_H_ */ +#endif /* _INET_DIAG_H_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0b3f7294c5c7..fef122782b44 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -690,7 +690,7 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked); - /* get info for tcp_diag (optional) */ + /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ff5b5459b97a..efce4f346fd0 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -20,8 +20,8 @@ config IP_DCCP If in doubt, say N. config IP_DCCP_DIAG - depends on IP_DCCP && IP_TCPDIAG - def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + depends on IP_DCCP && IP_INET_DIAG + def_tristate y if (IP_DCCP = y && IP_INET_DIAG = y) def_tristate m source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 4d9037c56ddc..9f07eff2e3b6 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -16,10 +16,10 @@ #include "dccp.h" -static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, +static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { - r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; + r->idiag_rqueue = r->idiag_wqueue = 0; } static struct inet_diag_handler dccp_diag_handler = { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 1e6db2a896b9..019e88d8f29e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -413,13 +413,13 @@ config INET_TUNNEL If unsure, say Y. -config IP_TCPDIAG - tristate "IP: TCP socket monitoring interface" +config IP_INET_DIAG + tristate "IP: INET socket monitoring interface" default y ---help--- - Support for TCP socket monitoring interface used by native Linux - tools such as ss. ss is included in iproute2, currently downloadable - at . + Support for INET (TCP, DCCP, etc) socket monitoring interface used by + native Linux tools such as ss. ss is included in iproute2, currently + downloadable at . If unsure, say Y. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ea0e1d87dc7e..9b1c894039a9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ -obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o +obj-$(CONFIG_IP_INET_DIAG) += tcp_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b13b71cb9ced..24abe82e23a0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,7 +1,7 @@ /* - * tcp_diag.c Module for monitoring TCP sockets. + * inet_diag.c Module for monitoring INET transport protocols sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ * * Authors: Alexey Kuznetsov, * @@ -36,8 +36,7 @@ static const struct inet_diag_handler **inet_diag_table; -struct tcpdiag_entry -{ +struct inet_diag_entry { u32 *saddr; u32 *daddr; u16 sport; @@ -46,25 +45,21 @@ struct tcpdiag_entry u16 userlocks; }; -static struct sock *tcpnl; +static struct sock *idiagnl; -#define TCPDIAG_PUT(skb, attrtype, attrlen) \ +#define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) -#ifdef CONFIG_IP_TCPDIAG_DCCP -extern struct inet_hashinfo dccp_hashinfo; -#endif - -static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, +static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcpdiagmsg *r; + struct inet_diag_msg *r; struct nlmsghdr *nlh; void *info = NULL; - struct tcpdiag_meminfo *minfo = NULL; + struct inet_diag_meminfo *minfo = NULL; unsigned char *b = skb->tail; const struct inet_diag_handler *handler; @@ -76,51 +71,52 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { - if (ext & (1<<(TCPDIAG_MEMINFO-1))) - minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo)); - if (ext & (1<<(TCPDIAG_INFO-1))) - info = TCPDIAG_PUT(skb, TCPDIAG_INFO, + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) + minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, + sizeof(*minfo)); + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, handler->idiag_info_size); - if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { size_t len = strlen(icsk->icsk_ca_ops->name); - strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), icsk->icsk_ca_ops->name); } } - r->tcpdiag_family = sk->sk_family; - r->tcpdiag_state = sk->sk_state; - r->tcpdiag_timer = 0; - r->tcpdiag_retrans = 0; + r->idiag_family = sk->sk_family; + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; - r->id.tcpdiag_if = sk->sk_bound_dev_if; - r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk; - r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)sk; + r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - if (r->tcpdiag_state == TCP_TIME_WAIT) { + if (r->idiag_state == TCP_TIME_WAIT) { const struct inet_timewait_sock *tw = inet_twsk(sk); long tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; - r->id.tcpdiag_sport = tw->tw_sport; - r->id.tcpdiag_dport = tw->tw_dport; - r->id.tcpdiag_src[0] = tw->tw_rcv_saddr; - r->id.tcpdiag_dst[0] = tw->tw_daddr; - r->tcpdiag_state = tw->tw_substate; - r->tcpdiag_timer = 3; - r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ; - r->tcpdiag_rqueue = 0; - r->tcpdiag_wqueue = 0; - r->tcpdiag_uid = 0; - r->tcpdiag_inode = 0; + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { + if (r->idiag_family == AF_INET6) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &tcp6tw->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &tcp6tw->tw_v6_daddr); } #endif @@ -128,18 +124,18 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, return skb->len; } - r->id.tcpdiag_sport = inet->sport; - r->id.tcpdiag_dport = inet->dport; - r->id.tcpdiag_src[0] = inet->rcv_saddr; - r->id.tcpdiag_dst[0] = inet->daddr; + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = inet->dport; + r->id.idiag_src[0] = inet->rcv_saddr; + r->id.idiag_dst[0] = inet->daddr; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { + if (r->idiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &np->rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &np->daddr); } #endif @@ -147,31 +143,31 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ if (icsk->icsk_pending == ICSK_TIME_RETRANS) { - r->tcpdiag_timer = 1; - r->tcpdiag_retrans = icsk->icsk_retransmits; - r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_timer = 1; + r->idiag_retrans = icsk->icsk_retransmits; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { - r->tcpdiag_timer = 4; - r->tcpdiag_retrans = icsk->icsk_probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_timer = 4; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { - r->tcpdiag_timer = 2; - r->tcpdiag_retrans = icsk->icsk_probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + r->idiag_timer = 2; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { - r->tcpdiag_timer = 0; - r->tcpdiag_expires = 0; + r->idiag_timer = 0; + r->idiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_uid = sock_i_uid(sk); - r->tcpdiag_inode = sock_i_ino(sk); + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = sock_i_ino(sk); if (minfo) { - minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc); - minfo->tcpdiag_wmem = sk->sk_wmem_queued; - minfo->tcpdiag_fmem = sk->sk_forward_alloc; - minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); + minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_wmem = sk->sk_wmem_queued; + minfo->idiag_fmem = sk->sk_forward_alloc; + minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); } handler->idiag_get_info(sk, r, info); @@ -189,11 +185,11 @@ nlmsg_failure: return -1; } -static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) +static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { int err; struct sock *sk; - struct tcpdiagreq *req = NLMSG_DATA(nlh); + struct inet_diag_req *req = NLMSG_DATA(nlh); struct sk_buff *rep; struct inet_hashinfo *hashinfo; const struct inet_diag_handler *handler; @@ -202,19 +198,19 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) BUG_ON(handler == NULL); hashinfo = handler->idiag_hashinfo; - if (req->tcpdiag_family == AF_INET) { - sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], - req->id.tcpdiag_dport, req->id.tcpdiag_src[0], - req->id.tcpdiag_sport, req->id.tcpdiag_if); + if (req->idiag_family == AF_INET) { + sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], + req->id.idiag_sport, req->id.idiag_if); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->tcpdiag_family == AF_INET6) { + else if (req->idiag_family == AF_INET6) { sk = inet6_lookup(hashinfo, - (struct in6_addr*)req->id.tcpdiag_dst, - req->id.tcpdiag_dport, - (struct in6_addr*)req->id.tcpdiag_src, - req->id.tcpdiag_sport, - req->id.tcpdiag_if); + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); } #endif else { @@ -225,26 +221,27 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) return -ENOENT; err = -ESTALE; - if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE || - req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1])) + if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || + req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && + ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || + (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct tcpdiagmsg) + - sizeof(struct tcpdiag_meminfo) + + rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + handler->idiag_info_size + 64)), GFP_KERNEL); if (!rep) goto out; - if (tcpdiag_fill(rep, sk, req->tcpdiag_ext, + if (inet_diag_fill(rep, sk, req->idiag_ext, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); - err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); if (err > 0) err = 0; @@ -285,42 +282,42 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) } -static int tcpdiag_bc_run(const void *bc, int len, - const struct tcpdiag_entry *entry) +static int inet_diag_bc_run(const void *bc, int len, + const struct inet_diag_entry *entry) { while (len > 0) { int yes = 1; - const struct tcpdiag_bc_op *op = bc; + const struct inet_diag_bc_op *op = bc; switch (op->code) { - case TCPDIAG_BC_NOP: + case INET_DIAG_BC_NOP: break; - case TCPDIAG_BC_JMP: + case INET_DIAG_BC_JMP: yes = 0; break; - case TCPDIAG_BC_S_GE: + case INET_DIAG_BC_S_GE: yes = entry->sport >= op[1].no; break; - case TCPDIAG_BC_S_LE: + case INET_DIAG_BC_S_LE: yes = entry->dport <= op[1].no; break; - case TCPDIAG_BC_D_GE: + case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; break; - case TCPDIAG_BC_D_LE: + case INET_DIAG_BC_D_LE: yes = entry->dport <= op[1].no; break; - case TCPDIAG_BC_AUTO: + case INET_DIAG_BC_AUTO: yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); break; - case TCPDIAG_BC_S_COND: - case TCPDIAG_BC_D_COND: + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: { - struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1); + struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(op+1); u32 *addr; if (cond->port != -1 && - cond->port != (op->code == TCPDIAG_BC_S_COND ? + cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { yes = 0; break; @@ -329,7 +326,7 @@ static int tcpdiag_bc_run(const void *bc, int len, if (cond->prefix_len == 0) break; - if (op->code == TCPDIAG_BC_S_COND) + if (op->code == INET_DIAG_BC_S_COND) addr = entry->saddr; else addr = entry->daddr; @@ -362,7 +359,7 @@ static int tcpdiag_bc_run(const void *bc, int len, static int valid_cc(const void *bc, int len, int cc) { while (len >= 0) { - const struct tcpdiag_bc_op *op = bc; + const struct inet_diag_bc_op *op = bc; if (cc > len) return 0; @@ -376,33 +373,33 @@ static int valid_cc(const void *bc, int len, int cc) return 0; } -static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len) +static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { const unsigned char *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc; + struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { - case TCPDIAG_BC_AUTO: - case TCPDIAG_BC_S_COND: - case TCPDIAG_BC_D_COND: - case TCPDIAG_BC_S_GE: - case TCPDIAG_BC_S_LE: - case TCPDIAG_BC_D_GE: - case TCPDIAG_BC_D_LE: + case INET_DIAG_BC_AUTO: + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: + case INET_DIAG_BC_S_GE: + case INET_DIAG_BC_S_LE: + case INET_DIAG_BC_D_GE: + case INET_DIAG_BC_D_LE: if (op->yes < 4 || op->yes > len+4) return -EINVAL; - case TCPDIAG_BC_JMP: + case INET_DIAG_BC_JMP: if (op->no < 4 || op->no > len+4) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len-op->no)) return -EINVAL; break; - case TCPDIAG_BC_NOP: + case INET_DIAG_BC_NOP: if (op->yes < 4 || op->yes > len+4) return -EINVAL; break; @@ -415,13 +412,13 @@ static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len) return len == 0 ? 0 : -EINVAL; } -static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, +static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb) { - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { - struct tcpdiag_entry entry; + struct inet_diag_entry entry; struct rtattr *bc = (struct rtattr *)(r + 1); struct inet_sock *inet = inet_sk(sk); @@ -442,15 +439,15 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, entry.dport = ntohs(inet->dport); entry.userlocks = sk->sk_userlocks; - if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) return 0; } - return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid, + return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } -static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, +static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, u32 pid, u32 seq, const struct nlmsghdr *unlh) @@ -458,7 +455,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); unsigned char *b = skb->tail; - struct tcpdiagmsg *r; + struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -466,33 +463,33 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, nlh->nlmsg_flags = NLM_F_MULTI; r = NLMSG_DATA(nlh); - r->tcpdiag_family = sk->sk_family; - r->tcpdiag_state = TCP_SYN_RECV; - r->tcpdiag_timer = 1; - r->tcpdiag_retrans = req->retrans; + r->idiag_family = sk->sk_family; + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = req->retrans; - r->id.tcpdiag_if = sk->sk_bound_dev_if; - r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req; - r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)req; + r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); tmo = req->expires - jiffies; if (tmo < 0) tmo = 0; - r->id.tcpdiag_sport = inet->sport; - r->id.tcpdiag_dport = ireq->rmt_port; - r->id.tcpdiag_src[0] = ireq->loc_addr; - r->id.tcpdiag_dst[0] = ireq->rmt_addr; - r->tcpdiag_expires = jiffies_to_msecs(tmo), - r->tcpdiag_rqueue = 0; - r->tcpdiag_wqueue = 0; - r->tcpdiag_uid = sock_i_uid(sk); - r->tcpdiag_inode = 0; + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = ireq->rmt_port; + r->id.idiag_src[0] = ireq->loc_addr; + r->id.idiag_dst[0] = ireq->rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + if (r->idiag_family == AF_INET6) { + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &tcp6_rsk(req)->loc_addr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &tcp6_rsk(req)->rmt_addr); } #endif @@ -505,11 +502,11 @@ nlmsg_failure: return -1; } -static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, +static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb) { - struct tcpdiag_entry entry; - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_entry entry; + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; struct rtattr *bc = NULL; @@ -547,8 +544,8 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (reqnum < s_reqnum) continue; - if (r->id.tcpdiag_dport != ireq->rmt_port && - r->id.tcpdiag_dport) + if (r->id.idiag_dport != ireq->rmt_port && + r->id.idiag_dport) continue; if (bc) { @@ -566,12 +563,12 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); - if (!tcpdiag_bc_run(RTA_DATA(bc), + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) continue; } - err = tcpdiag_fill_req(skb, sk, req, + err = inet_diag_fill_req(skb, sk, req, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { @@ -590,11 +587,11 @@ out: return err; } -static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int i, num; int s_i, s_num; - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; @@ -606,7 +603,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_num = num = cb->args[2]; if (cb->args[0] == 0) { - if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) + if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; inet_listen_lock(hashinfo); @@ -623,25 +620,25 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_listen; - if (!(r->tcpdiag_states&TCPF_LISTEN) || - r->id.tcpdiag_dport || + if (!(r->idiag_states & TCPF_LISTEN) || + r->id.idiag_dport || cb->args[3] > 0) goto syn_recv; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } syn_recv: - if (!(r->tcpdiag_states&TCPF_SYN_RECV)) + if (!(r->idiag_states & TCPF_SYN_RECV)) goto next_listen; - if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { + if (inet_diag_dump_reqs(skb, sk, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } @@ -662,7 +659,7 @@ skip_listen_ht: s_i = num = s_num = 0; } - if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) + if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) return skb->len; for (i = s_i; i < hashinfo->ehash_size; i++) { @@ -681,14 +678,14 @@ skip_listen_ht: if (num < s_num) goto next_normal; - if (!(r->tcpdiag_states & (1 << sk->sk_state))) + if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_normal; - if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport) + if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) goto next_normal; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -696,20 +693,20 @@ next_normal: ++num; } - if (r->tcpdiag_states&TCPF_TIME_WAIT) { + if (r->idiag_states & TCPF_TIME_WAIT) { sk_for_each(sk, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_dying; - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_dying; - if (r->id.tcpdiag_dport != inet->dport && - r->id.tcpdiag_dport) + if (r->id.idiag_dport != inet->dport && + r->id.idiag_dport) goto next_dying; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -726,14 +723,14 @@ done: return skb->len; } -static int tcpdiag_dump_done(struct netlink_callback *cb) +static int inet_diag_dump_done(struct netlink_callback *cb) { return 0; } static __inline__ int -tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; @@ -744,24 +741,28 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (inet_diag_table[nlh->nlmsg_type] == NULL) return -ENOENT; - if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) + if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) goto err_inval; if (nlh->nlmsg_flags&NLM_F_DUMP) { - if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) { - struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq)); - if (rta->rta_type != TCPDIAG_REQ_BYTECODE || + if (nlh->nlmsg_len > + (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { + struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + + sizeof(struct inet_diag_req)); + if (rta->rta_type != INET_DIAG_REQ_BYTECODE || rta->rta_len < 8 || - rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq))) + rta->rta_len > + (nlh->nlmsg_len - + NLMSG_SPACE(sizeof(struct inet_diag_req)))) goto err_inval; - if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) + if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) goto err_inval; } - return netlink_dump_start(tcpnl, skb, nlh, - tcpdiag_dump, - tcpdiag_dump_done); + return netlink_dump_start(idiagnl, skb, nlh, + inet_diag_dump, + inet_diag_dump_done); } else { - return tcpdiag_get_exact(skb, nlh); + return inet_diag_get_exact(skb, nlh); } err_inval: @@ -769,7 +770,7 @@ err_inval: } -static inline void tcpdiag_rcv_skb(struct sk_buff *skb) +static inline void inet_diag_rcv_skb(struct sk_buff *skb) { int err; struct nlmsghdr * nlh; @@ -778,31 +779,31 @@ static inline void tcpdiag_rcv_skb(struct sk_buff *skb) nlh = (struct nlmsghdr *)skb->data; if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - err = tcpdiag_rcv_msg(skb, nlh); + err = inet_diag_rcv_msg(skb, nlh); if (err || nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, err); } } -static void tcpdiag_rcv(struct sock *sk, int len) +static void inet_diag_rcv(struct sock *sk, int len) { struct sk_buff *skb; unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { - tcpdiag_rcv_skb(skb); + inet_diag_rcv_skb(skb); kfree_skb(skb); } } -static void tcp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, +static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_wqueue = tp->write_seq - tp->snd_una; if (info != NULL) tcp_get_info(sk, info); } @@ -851,7 +852,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) } EXPORT_SYMBOL_GPL(inet_diag_unregister); -static int __init tcpdiag_init(void) +static int __init inet_diag_init(void) { const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * sizeof(struct inet_diag_handler *)); @@ -863,9 +864,9 @@ static int __init tcpdiag_init(void) memset(inet_diag_table, 0, inet_diag_table_size); - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, - THIS_MODULE); - if (tcpnl == NULL) + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + THIS_MODULE); + if (idiagnl == NULL) goto out_free_table; err = inet_diag_register(&tcp_diag_handler); @@ -874,18 +875,18 @@ static int __init tcpdiag_init(void) out: return err; out_sock_release: - sock_release(tcpnl->sk_socket); + sock_release(idiagnl->sk_socket); out_free_table: kfree(inet_diag_table); goto out; } -static void __exit tcpdiag_exit(void) +static void __exit inet_diag_exit(void) { - sock_release(tcpnl->sk_socket); + sock_release(idiagnl->sk_socket); kfree(inet_diag_table); } -module_init(tcpdiag_init); -module_exit(tcpdiag_exit); +module_init(inet_diag_init); +module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 054de24efee2..8cef9dc11fb7 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -365,10 +365,10 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); - if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct tcpvegas_info *info; - info = RTA_DATA(__RTA_PUT(skb, TCPDIAG_VEGASINFO, + info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info))); info->tcpv_enabled = ca->doing_vegas_now; diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index d8a5a2b92e37..395100317875 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -216,11 +216,11 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); - if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct rtattr *rta; struct tcpvegas_info *info; - rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info)); + rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); info = RTA_DATA(rta); info->tcpv_enabled = 1; info->tcpv_rttcnt = 0; From a8c2190ee7da1a1dc68ff1a6b5f03feb61e523a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:56:38 -0300 Subject: [PATCH 386/584] [INET_DIAG]: Rename tcp_diag.[ch] to inet_diag.[ch] Next changeset will introduce net/ipv4/tcp_diag.c, moving the code that was put transitioanlly in inet_diag.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/{tcp_diag.h => inet_diag.h} | 0 net/dccp/diag.c | 2 +- net/ipv4/Makefile | 2 +- net/ipv4/{tcp_diag.c => inet_diag.c} | 23 ++++++++++++----------- net/ipv4/tcp_vegas.c | 2 +- net/ipv4/tcp_westwood.c | 2 +- 6 files changed, 16 insertions(+), 15 deletions(-) rename include/linux/{tcp_diag.h => inet_diag.h} (100%) rename net/ipv4/{tcp_diag.c => inet_diag.c} (97%) diff --git a/include/linux/tcp_diag.h b/include/linux/inet_diag.h similarity index 100% rename from include/linux/tcp_diag.h rename to include/linux/inet_diag.h diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 9f07eff2e3b6..0b10c176c35a 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "dccp.h" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9b1c894039a9..fe5accbb56bf 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ -obj-$(CONFIG_IP_INET_DIAG) += tcp_diag.o +obj-$(CONFIG_IP_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/inet_diag.c similarity index 97% rename from net/ipv4/tcp_diag.c rename to net/ipv4/inet_diag.c index 24abe82e23a0..3bd510941da0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/inet_diag.c @@ -32,7 +32,7 @@ #include #include -#include +#include static const struct inet_diag_handler **inet_diag_table; @@ -140,7 +140,7 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, } #endif -#define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ +#define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->idiag_timer = 1; @@ -311,11 +311,11 @@ static int inet_diag_bc_run(const void *bc, int len, yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); break; case INET_DIAG_BC_S_COND: - case INET_DIAG_BC_D_COND: - { - struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(op+1); + case INET_DIAG_BC_D_COND: { + struct inet_diag_hostcond *cond; u32 *addr; + cond = (struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { @@ -337,7 +337,8 @@ static int inet_diag_bc_run(const void *bc, int len, cond->family == AF_INET) { if (addr[0] == 0 && addr[1] == 0 && addr[2] == htonl(0xffff) && - bitstring_match(addr+3, cond->addr, cond->prefix_len)) + bitstring_match(addr + 3, cond->addr, + cond->prefix_len)) break; } yes = 0; @@ -379,7 +380,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)bc; + struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -390,17 +391,17 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len+4) + if (op->yes < 4 || op->yes > len + 4) return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len+4) + if (op->no < 4 || op->no > len + 4) return -EINVAL; if (op->no < len && - !valid_cc(bytecode, bytecode_len, len-op->no)) + !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len+4) + if (op->yes < 4 || op->yes > len + 4) return -EINVAL; break; default: diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 8cef9dc11fb7..93c5f92070f9 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 395100317875..0c340c3756c2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include /* TCP Westwood structure */ From 17b085eacef81a6286bd478f2ec75e04abb091cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:59:17 -0300 Subject: [PATCH 387/584] [INET_DIAG]: Move the tcp_diag interface to the proper place With this the previous setup is back, i.e. tcp_diag can be built as a module, as dccp_diag and both share the infrastructure available in inet_diag. If one selects CONFIG_INET_DIAG as module CONFIG_INET_TCP_DIAG will also be built as a module, as will CONFIG_INET_DCCP_DIAG, if CONFIG_IP_DCCP was selected static or as a module, if CONFIG_INET_DIAG is y, being statically linked CONFIG_INET_TCP_DIAG will follow suit and CONFIG_INET_DCCP_DIAG will be built in the same manner as CONFIG_IP_DCCP. Now to aim at UDP, converting it to use inet_hashinfo, so that we can use iproute2 for UDP sockets as well. Ah, just to show an example of this new infrastructure working for DCCP :-) [root@qemu ~]# ./ss -dane State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 0 *:5001 *:* ino:942 sk:cfd503a0 ESTAB 0 0 127.0.0.1:5001 127.0.0.1:32770 ino:943 sk:cfd50a60 ESTAB 0 0 127.0.0.1:32770 127.0.0.1:5001 ino:947 sk:cfd50700 TIME-WAIT 0 0 127.0.0.1:32769 127.0.0.1:5001 timer:(timewait,3.430ms,0) ino:0 sk:cf209620 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/dccp/Kconfig | 6 ++--- net/dccp/Makefile | 6 ++--- net/ipv4/Kconfig | 8 +++++-- net/ipv4/Makefile | 3 ++- net/ipv4/inet_diag.c | 27 +--------------------- net/ipv4/tcp_diag.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 70 insertions(+), 36 deletions(-) create mode 100644 net/ipv4/tcp_diag.c diff --git a/include/net/tcp.h b/include/net/tcp.h index fef122782b44..d958260af23c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -479,7 +479,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -/* tcp_diag.c */ +/* tcp.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index efce4f346fd0..6760830c490f 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -19,9 +19,9 @@ config IP_DCCP If in doubt, say N. -config IP_DCCP_DIAG - depends on IP_DCCP && IP_INET_DIAG - def_tristate y if (IP_DCCP = y && IP_INET_DIAG = y) +config INET_DCCP_DIAG + depends on IP_DCCP && INET_DIAG + def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 5741fffc436f..44a867f29184 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,8 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o packet_history.o -obj-$(CONFIG_IP_DCCP_DIAG) += dccp_diag.o - -obj-y += ccids/ +obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o + +obj-y += ccids/ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 019e88d8f29e..e55136ae09f4 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -413,8 +413,8 @@ config INET_TUNNEL If unsure, say Y. -config IP_INET_DIAG - tristate "IP: INET socket monitoring interface" +config INET_DIAG + tristate "INET: socket monitoring interface" default y ---help--- Support for INET (TCP, DCCP, etc) socket monitoring interface used by @@ -423,6 +423,10 @@ config IP_INET_DIAG If unsure, say Y. +config INET_TCP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG + config TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index fe5accbb56bf..f0435d00db6b 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -30,8 +30,9 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ -obj-$(CONFIG_IP_INET_DIAG) += inet_diag.o +obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o +obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3bd510941da0..1880ad8575d8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -797,25 +797,6 @@ static void inet_diag_rcv(struct sock *sk, int len) } } -static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, - void *_info) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct tcp_info *info = _info; - - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->idiag_wqueue = tp->write_seq - tp->snd_una; - if (info != NULL) - tcp_get_info(sk, info); -} - -static struct inet_diag_handler tcp_diag_handler = { - .idiag_hashinfo = &tcp_hashinfo, - .idiag_get_info = tcp_diag_get_info, - .idiag_type = TCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), -}; - static DEFINE_SPINLOCK(inet_diag_register_lock); int inet_diag_register(const struct inet_diag_handler *h) @@ -864,19 +845,13 @@ static int __init inet_diag_init(void) goto out; memset(inet_diag_table, 0, inet_diag_table_size); - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; - - err = inet_diag_register(&tcp_diag_handler); - if (err) - goto out_sock_release; + err = 0; out: return err; -out_sock_release: - sock_release(idiagnl->sk_socket); out_free_table: kfree(inet_diag_table); goto out; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c new file mode 100644 index 000000000000..c148c1081880 --- /dev/null +++ b/net/ipv4/tcp_diag.c @@ -0,0 +1,54 @@ +/* + * tcp_diag.c Module for monitoring TCP transport protocols sockets. + * + * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include +#include + +#include + +#include + +static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *_info) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_info *info = _info; + + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_wqueue = tp->write_seq - tp->snd_una; + if (info != NULL) + tcp_get_info(sk, info); +} + +static struct inet_diag_handler tcp_diag_handler = { + .idiag_hashinfo = &tcp_hashinfo, + .idiag_get_info = tcp_diag_get_info, + .idiag_type = TCPDIAG_GETSOCK, + .idiag_info_size = sizeof(struct tcp_info), +}; + +static int __init tcp_diag_init(void) +{ + return inet_diag_register(&tcp_diag_handler); +} + +static void __exit tcp_diag_exit(void) +{ + inet_diag_unregister(&tcp_diag_handler); +} + +module_init(tcp_diag_init); +module_exit(tcp_diag_exit); +MODULE_LICENSE("GPL"); From 0ba2c6e8c0fb5cde5a23a213c2e7cb851b85c310 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:55:44 -0700 Subject: [PATCH 388/584] [NETFILTER]: introduce and use aligned_u64 data type As proposed by Andi Kleen, this is required esp. for x86_64 architecture, where 64bit code needs 8byte aligned 64bit data types, but 32bit userspace apps will only align to 4bytes. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 5 +++-- include/linux/netfilter/nfnetlink_queue.h | 5 +++-- include/linux/types.h | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a61836a083e7..b04b03880595 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -5,6 +5,7 @@ * and not any kind of function definitions. It is shared between kernel and * userspace. Don't put kernel specific stuff in here */ +#include #include enum nfulnl_msg_types { @@ -27,8 +28,8 @@ struct nfulnl_msg_packet_hw { } __attribute__ ((packed)); struct nfulnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 2d8d2b2cfcaa..9e774373244c 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -1,6 +1,7 @@ #ifndef _NFNETLINK_QUEUE_H #define _NFNETLINK_QUEUE_H +#include #include enum nfqnl_msg_types { @@ -24,8 +25,8 @@ struct nfqnl_msg_packet_hw { } __attribute__ ((packed)); struct nfqnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); enum nfqnl_attr_type { diff --git a/include/linux/types.h b/include/linux/types.h index dcb13f865df9..2b678c22ca4a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -123,6 +123,9 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif +/* this is a special 64bit data type that is 8-byte aligned */ +#define aligned_u64 unsigned long long __attribute__((aligned(8))) + /* * The type used for indexing onto a disc or disc partition. * If required, asm/types.h can override it and define From 9d810fd2d28a9d672eca3136476af1a54a380bb2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:56:26 -0700 Subject: [PATCH 389/584] [NETFILTER]: Add new iptables "connbytes" match This patch ads a new "connbytes" match that utilizes the CONFIG_NF_CT_ACCT per-connection byte and packet counters. Using it you can do things like packet classification on average packet size within a connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 25 +++ net/ipv4/netfilter/Kconfig | 11 +- net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_connbytes.c | 166 +++++++++++++++++++ 4 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h create mode 100644 net/ipv4/netfilter/ipt_connbytes.c diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h new file mode 100644 index 000000000000..abaa65afd4e9 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -0,0 +1,25 @@ +#ifndef _IPT_CONNBYTES_H +#define _IPT_CONNBYTES_H + +enum ipt_connbytes_what { + IPT_CONNBYTES_WHAT_PKTS, + IPT_CONNBYTES_WHAT_BYTES, + IPT_CONNBYTES_WHAT_AVGPKT, +}; + +enum ipt_connbytes_direction { + IPT_CONNBYTES_DIR_ORIGINAL, + IPT_CONNBYTES_DIR_REPLY, + IPT_CONNBYTES_DIR_BOTH, +}; + +struct ipt_connbytes_info +{ + struct { + aligned_u64 from; /* count to be matched */ + aligned_u64 to; /* count to be matched */ + } count; + u_int8_t what; /* ipt_connbytes_what */ + u_int8_t direction; /* ipt_connbytes_direction */ +}; +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 9f5e1d769b5f..3f7e6e49cbdd 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -386,6 +386,16 @@ config IP_NF_MATCH_CONNMARK . The module will be called ipt_connmark.o. If unsure, say `N'. +config IP_NF_MATCH_CONNBYTES + tristate 'Connection byte/packet counter match support' + depends on IP_NF_CT_ACCT && IP_NF_IPTABLES + help + This option adds a `connbytes' match, which allows you to match the + number of bytes and/or packets for each direction within a connection. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config IP_NF_MATCH_HASHLIMIT tristate 'hashlimit match support' depends on IP_NF_IPTABLES @@ -723,6 +733,5 @@ config IP_NF_CONNTRACK_NETLINK help This option enables support for a netlink-based userspace interface - endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 58aa7c616e1f..7c8ae858aa43 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o +obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c new file mode 100644 index 000000000000..0dfb52c0e808 --- /dev/null +++ b/net/ipv4/netfilter/ipt_connbytes.c @@ -0,0 +1,166 @@ +/* Kernel module to match connection tracking byte counter. + * GPL (C) 2002 Martin Devera (devik@cdi.cz). + * + * 2004-07-20 Harald Welte + * - reimplemented to use per-connection accounting counters + * - add functionality to match number of packets + * - add functionality to match average packet size + * - add support to match directions seperately + * + */ +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); + +/* 64bit divisor, dividend and result. dynamic precision */ +static u_int64_t div64_64(u_int64_t divisor, u_int64_t dividend) +{ + u_int64_t result = divisor; + + if (dividend > 0xffffffff) { + int first_bit = find_first_bit((unsigned long *) ÷nd, sizeof(dividend)); + /* calculate number of bits to shift. shift exactly enough + * bits to make dividend fit in 32bits. */ + int num_shift = (64 - 32 - first_bit); + /* first bit has to be < 32, since dividend was > 0xffffffff */ + result = result >> num_shift; + dividend = dividend >> num_shift; + } + + do_div(divisor, dividend); + + return divisor; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_connbytes_info *sinfo = matchinfo; + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct; + u_int64_t what = 0; /* initialize to make gcc happy */ + + if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) + return 0; /* no match */ + + switch (sinfo->what) { + case IPT_CONNBYTES_WHAT_PKTS: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = ct->counters[IP_CT_DIR_ORIGINAL].packets; + break; + case IPT_CONNBYTES_DIR_REPLY: + what = ct->counters[IP_CT_DIR_REPLY].packets; + break; + case IPT_CONNBYTES_DIR_BOTH: + what = ct->counters[IP_CT_DIR_ORIGINAL].packets; + what += ct->counters[IP_CT_DIR_REPLY].packets; + break; + } + break; + case IPT_CONNBYTES_WHAT_BYTES: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; + break; + case IPT_CONNBYTES_DIR_REPLY: + what = ct->counters[IP_CT_DIR_REPLY].bytes; + break; + case IPT_CONNBYTES_DIR_BOTH: + what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; + what += ct->counters[IP_CT_DIR_REPLY].bytes; + break; + } + break; + case IPT_CONNBYTES_WHAT_AVGPKT: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, + ct->counters[IP_CT_DIR_ORIGINAL].packets); + break; + case IPT_CONNBYTES_DIR_REPLY: + what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, + ct->counters[IP_CT_DIR_REPLY].packets); + break; + case IPT_CONNBYTES_DIR_BOTH: + { + u_int64_t bytes; + u_int64_t pkts; + bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + + ct->counters[IP_CT_DIR_REPLY].bytes; + pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ + ct->counters[IP_CT_DIR_REPLY].packets; + + /* FIXME_THEORETICAL: what to do if sum + * overflows ? */ + + what = div64_64(bytes, pkts); + } + break; + } + break; + } + + if (sinfo->count.to) + return (what <= sinfo->count.to && what >= sinfo->count.from); + else + return (what >= sinfo->count.from); +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_connbytes_info *sinfo = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info))) + return 0; + + if (sinfo->what != IPT_CONNBYTES_WHAT_PKTS && + sinfo->what != IPT_CONNBYTES_WHAT_BYTES && + sinfo->what != IPT_CONNBYTES_WHAT_AVGPKT) + return 0; + + if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL && + sinfo->direction != IPT_CONNBYTES_DIR_REPLY && + sinfo->direction != IPT_CONNBYTES_DIR_BOTH) + return 0; + + return 1; +} + +static struct ipt_match state_match = { + .name = "connbytes", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&state_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&state_match); +} + +module_init(init); +module_exit(fini); From 8ffde671730df0b392ca478643b88ef7153244c0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 13 Aug 2005 13:57:58 -0700 Subject: [PATCH 390/584] [NETFILTER]: Fix div64_64 in ipt_connbytes Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_connbytes.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c index 0dfb52c0e808..47128c073d85 100644 --- a/net/ipv4/netfilter/ipt_connbytes.c +++ b/net/ipv4/netfilter/ipt_connbytes.c @@ -22,23 +22,19 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); /* 64bit divisor, dividend and result. dynamic precision */ -static u_int64_t div64_64(u_int64_t divisor, u_int64_t dividend) +static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) { - u_int64_t result = divisor; + u_int32_t d = divisor; - if (dividend > 0xffffffff) { - int first_bit = find_first_bit((unsigned long *) ÷nd, sizeof(dividend)); - /* calculate number of bits to shift. shift exactly enough - * bits to make dividend fit in 32bits. */ - int num_shift = (64 - 32 - first_bit); - /* first bit has to be < 32, since dividend was > 0xffffffff */ - result = result >> num_shift; - dividend = dividend >> num_shift; + if (divisor > 0xffffffffULL) { + unsigned int shift = fls(divisor >> 32); + + d = divisor >> shift; + dividend >>= shift; } - do_div(divisor, dividend); - - return divisor; + do_div(dividend, d); + return dividend; } static int From 25ed891019b84498c83903ecf53df7ce35e9cff6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 13 Aug 2005 13:58:21 -0700 Subject: [PATCH 391/584] [NETFILTER]: Nicer names for ipt_connbytes constants Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 6 +++--- net/ipv4/netfilter/ipt_connbytes.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h index abaa65afd4e9..9e5532f8d8ac 100644 --- a/include/linux/netfilter_ipv4/ipt_connbytes.h +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -2,9 +2,9 @@ #define _IPT_CONNBYTES_H enum ipt_connbytes_what { - IPT_CONNBYTES_WHAT_PKTS, - IPT_CONNBYTES_WHAT_BYTES, - IPT_CONNBYTES_WHAT_AVGPKT, + IPT_CONNBYTES_PKTS, + IPT_CONNBYTES_BYTES, + IPT_CONNBYTES_AVGPKT, }; enum ipt_connbytes_direction { diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c index 47128c073d85..df4a42c6da22 100644 --- a/net/ipv4/netfilter/ipt_connbytes.c +++ b/net/ipv4/netfilter/ipt_connbytes.c @@ -54,7 +54,7 @@ match(const struct sk_buff *skb, return 0; /* no match */ switch (sinfo->what) { - case IPT_CONNBYTES_WHAT_PKTS: + case IPT_CONNBYTES_PKTS: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = ct->counters[IP_CT_DIR_ORIGINAL].packets; @@ -68,7 +68,7 @@ match(const struct sk_buff *skb, break; } break; - case IPT_CONNBYTES_WHAT_BYTES: + case IPT_CONNBYTES_BYTES: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; @@ -82,7 +82,7 @@ match(const struct sk_buff *skb, break; } break; - case IPT_CONNBYTES_WHAT_AVGPKT: + case IPT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, @@ -128,9 +128,9 @@ static int check(const char *tablename, if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info))) return 0; - if (sinfo->what != IPT_CONNBYTES_WHAT_PKTS && - sinfo->what != IPT_CONNBYTES_WHAT_BYTES && - sinfo->what != IPT_CONNBYTES_WHAT_AVGPKT) + if (sinfo->what != IPT_CONNBYTES_PKTS && + sinfo->what != IPT_CONNBYTES_BYTES && + sinfo->what != IPT_CONNBYTES_AVGPKT) return 0; if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL && From a61bbcf28a8cb0ba56f8193d512f7222e711a294 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:24:31 -0700 Subject: [PATCH 392/584] [NET]: Store skb->timestamp as offset to a base timestamp Reduces skb size by 8 bytes on 64-bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 2 +- drivers/atm/atmtcp.c | 2 +- drivers/atm/eni.c | 2 +- drivers/atm/firestream.c | 2 +- drivers/atm/fore200e.c | 2 +- drivers/atm/he.c | 2 +- drivers/atm/horizon.c | 2 +- drivers/atm/idt77252.c | 8 +++--- drivers/atm/lanai.c | 2 +- drivers/atm/nicstar.c | 10 +++---- drivers/atm/zatm.c | 2 +- include/linux/skbuff.h | 47 ++++++++++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 2 +- include/net/neighbour.h | 9 +++++- include/net/sock.h | 13 +++++---- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 8 ++++-- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/dev.c | 28 +++++++++++++------ net/core/neighbour.c | 7 +++-- net/core/skbuff.c | 8 ++++-- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 ++- net/ipv4/ip_fragment.c | 4 +-- net/ipv4/netfilter/ip_queue.c | 4 +-- net/ipv4/netfilter/ipt_ULOG.c | 8 +++--- net/ipv4/tcp_input.c | 10 +++++-- net/ipv4/tcp_output.c | 4 +-- net/ipv6/ndisc.c | 4 ++- net/ipv6/netfilter/ip6_queue.c | 4 +-- net/ipv6/reassembly.c | 4 +-- net/ipx/af_ipx.c | 4 +-- net/netfilter/nfnetlink_log.c | 6 ++-- net/netfilter/nfnetlink_queue.c | 6 ++-- net/packet/af_packet.c | 8 +++--- net/sctp/input.c | 4 +-- net/sctp/sm_make_chunk.c | 9 +++--- net/sunrpc/svcsock.c | 11 +++++--- 39 files changed, 172 insertions(+), 88 deletions(-) diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 73c6b85299c1..d74a7c5e75dd 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -513,7 +513,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { // VC layer stats atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability atm_vcc->push (atm_vcc, skb); return; diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index f2f01cb82cb4..57f1810fdccd 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -325,7 +325,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) result = -ENOBUFS; goto done; } - do_gettimeofday(&new_skb->stamp); + __net_timestamp(new_skb); memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); out_vcc->push(out_vcc,new_skb); atomic_inc(&vcc->stats->tx); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 10da36934769..c13c4d736ef5 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -537,7 +537,7 @@ static int rx_aal0(struct atm_vcc *vcc) return 0; } skb_put(skb,length); - skb->stamp = eni_vcc->timestamp; + skb_set_timestamp(skb, &eni_vcc->timestamp); DPRINTK("got len %ld\n",length); if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; eni_vcc->rxing++; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index b078fa548ebf..58219744f5db 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) skb_put (skb, qe->p1 & 0xffff); ATM_SKB(skb)->vcc = atm_vcc; atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); atm_vcc->push (atm_vcc, skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe); diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 5f702199543a..2bf723a7b6e6 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1176,7 +1176,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp return -ENOMEM; } - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #ifdef FORE200E_52BYTE_AAL0_SDU if (cell_header) { diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 28250c9b32d6..fde9334059af 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1886,7 +1886,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) if (rx_skb_reserve > 0) skb_reserve(skb, rx_skb_reserve); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); for (iov = he_vcc->iov_head; iov < he_vcc->iov_tail; ++iov) { diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 924a2c8988bd..0cded0468003 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -1034,7 +1034,7 @@ static void rx_schedule (hrz_dev * dev, int irq) { struct atm_vcc * vcc = ATM_SKB(skb)->vcc; // VC layer stats atomic_inc(&vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability vcc->push (vcc, skb); } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 30b7e990ed0b..b4a76cade646 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1101,7 +1101,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) cell, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); @@ -1179,7 +1179,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1201,7 +1201,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1340,7 +1340,7 @@ idt77252_rx_raw(struct idt77252_dev *card) ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ffe3afa723b8..51ec14787293 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1427,7 +1427,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr) skb_put(skb, size); vcc_rx_memcpy(skb->data, lvcc, size); ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); atomic_inc(&lvcc->rx.atmvcc->stats->rx); out: diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index a0e3bd861f1c..c57e20dcb0f8 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2213,7 +2213,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); skb_put(sb, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); cell += ATM_CELL_PAYLOAD; @@ -2346,7 +2346,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) skb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2373,7 +2373,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) sb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); } @@ -2398,7 +2398,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(skb->data, sb->data, NS_SMBUFSIZE); skb_put(skb, len - NS_SMBUFSIZE); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2505,7 +2505,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) #ifdef NS_USE_DESTRUCTORS hb->destructor = ns_hb_destructor; #endif /* NS_USE_DESTRUCTORS */ - do_gettimeofday(&hb->stamp); + __net_timestamp(hb); vcc->push(vcc, hb); atomic_inc(&vcc->stats->rx); } diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 85fee9530fa9..c4b75ecf9460 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -400,7 +400,7 @@ unsigned long *x; EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> uPD98401_AAL5_ES_SHIFT,error); skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #if 0 printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60b32151f76a..32635c401d4d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,13 +155,20 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) +extern struct timeval skb_tv_base; + +struct skb_timeval { + u32 off_sec; + u32 off_usec; +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @stamp: Time we arrived + * @tstamp: Time we arrived stored as offset to skb_tv_base * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -202,7 +209,7 @@ struct sk_buff { struct sk_buff *prev; struct sock *sk; - struct timeval stamp; + struct skb_timeval tstamp; struct net_device *dev; struct net_device *input_dev; @@ -1213,6 +1220,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, extern void skb_init(void); extern void skb_add_mtu(int mtu); +/** + * skb_get_timestamp - get timestamp from a skb + * @skb: skb to get stamp from + * @stamp: pointer to struct timeval to store stamp in + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts the offset back to a struct timeval and stores + * it in stamp. + */ +static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + stamp->tv_sec = skb->tstamp.off_sec; + stamp->tv_usec = skb->tstamp.off_usec; + if (skb->tstamp.off_sec) { + stamp->tv_sec += skb_tv_base.tv_sec; + stamp->tv_usec += skb_tv_base.tv_usec; + } +} + +/** + * skb_set_timestamp - set timestamp of a skb + * @skb: skb to set stamp of + * @stamp: pointer to struct timeval to get stamp from + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts a struct timeval to an offset and stores + * it in the skb. + */ +static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; + skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; +} + +extern void __net_timestamp(struct sk_buff *skb); + #ifdef CONFIG_NETFILTER static inline void nf_conntrack_put(struct nf_conntrack *nfct) { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731b..7f933f302078 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) bt_cb(skb)->incoming = 1; /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5ab..34c07731933d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, return neigh_create(tbl, pkey, dev); } -#define LOCALLY_ENQUEUED -2 +struct neighbour_cb { + unsigned long sched_next; + unsigned int flags; +}; + +#define LOCALLY_ENQUEUED 0x1 + +#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index 065df67b6422..d59428877078 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1282,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval *stamp = &skb->stamp; + struct timeval stamp; + + skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp->tv_sec == 0) - do_gettimeofday(stamp); + if (stamp.tv_sec == 0) + do_gettimeofday(&stamp); + skb_set_timestamp(skb, &stamp); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - stamp); + &stamp); } else - sk->sk_stamp = *stamp; + sk->sk_stamp = stamp; } /** diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f9e11b533a3..55dc42eac92c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -999,7 +999,7 @@ static int hci_send_frame(struct sk_buff *skb) if (atomic_read(&hdev->promisc)) { /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); hci_send_to_sock(hdev, skb); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 40b219560bb1..d6da0939216d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1087,7 +1087,7 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) memcpy(ev->data, data, dlen); bt_cb(skb)->incoming = 1; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index eed9090d77f1..32ef7975a139 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -332,8 +332,12 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); } - if (mask & HCI_CMSG_TSTAMP) - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp); + if (mask & HCI_CMSG_TSTAMP) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + } } static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 561d75c8ed5a..acb888d32587 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, pm->version = EBT_ULOG_VERSION; do_gettimeofday(&pm->stamp); if (ub->qlen == 1) - ub->skb->stamp = pm->stamp; + skb_set_timestamp(ub->skb, &pm->stamp); pm->data_len = copy_len; pm->mark = skb->nfmark; pm->hook = hooknr; diff --git a/net/core/dev.c b/net/core/dev.c index 9d153eb1e8cf..a3ed53cc4af8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1009,13 +1009,22 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -static inline void net_timestamp(struct timeval *stamp) +void __net_timestamp(struct sk_buff *skb) +{ + struct timeval tv; + + do_gettimeofday(&tv); + skb_set_timestamp(skb, &tv); +} +EXPORT_SYMBOL(__net_timestamp); + +static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) - do_gettimeofday(stamp); + __net_timestamp(skb); else { - stamp->tv_sec = 0; - stamp->tv_usec = 0; + skb->tstamp.off_sec = 0; + skb->tstamp.off_usec = 0; } } @@ -1027,7 +1036,8 @@ static inline void net_timestamp(struct timeval *stamp) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - net_timestamp(&skb->stamp); + + net_timestamp(skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1379,8 +1389,8 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); /* * The code is rearranged so that the path is the most @@ -1566,8 +1576,8 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); if (!skb->input_dev) skb->input_dev = skb->dev; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1beb782ac41b..72ee00f7b30c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg) while (skb != (struct sk_buff *)&tbl->proxy_queue) { struct sk_buff *back = skb; - long tdif = back->stamp.tv_usec - now; + long tdif = NEIGH_CB(back)->sched_next - now; skb = skb->next; if (tdif <= 0) { @@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, kfree_skb(skb); return; } - skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = sched_next; + + NEIGH_CB(skb)->sched_next = sched_next; + NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef498cb9f786..39a161dbc16d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,6 +70,8 @@ static kmem_cache_t *skbuff_head_cache; +struct timeval __read_mostly skb_tv_base; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -331,7 +333,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->next = n->prev = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); C(h); C(nh); @@ -408,7 +410,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; @@ -1645,6 +1647,7 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1678,3 +1681,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_tv_base); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8f0639905558..4a62093eb343 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_toiovec(msg->msg_iov, skb->data, copied); if (err) goto out_free; - sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &sk->sk_stamp); if (msg->msg_name) memcpy(msg->msg_name, skb->cb, msg->msg_namelen); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6eb9c549d643..8bf312bdea13 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb) if (n) neigh_release(n); - if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || + if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || in_dev->arp_parms->proxy_delay == 0) { arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); @@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out_of_mem; + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); freeskb: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1ac64c0c5b37..9e6e683cc34d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (skb->dev) qp->iif = skb->dev->ifindex; skb->dev = NULL; - qp->stamp = skb->stamp; + skb_get_timestamp(skb, &qp->stamp); qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->next = NULL; head->dev = dev; - head->stamp = qp->stamp; + skb_set_timestamp(head, &qp->stamp); iph = head->nh.iph; iph->frag_off = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 1c49833e00a9..7f2bcc7198fa 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b86f06ec9762..1d8ac4595e17 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum, pm = NLMSG_DATA(nlh); /* We might not have a timestamp, get one */ - if (skb->stamp.tv_sec == 0) - do_gettimeofday((struct timeval *)&skb->stamp); + if (skb->tstamp.off_sec == 0) + __net_timestamp((struct sk_buff *)skb); /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb->stamp.tv_sec; - pm->timestamp_usec = skb->stamp.tv_usec; + pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd9547fb783..ebb8654e3dee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2097,9 +2097,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if (seq_usrtt) - *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000 - + (usnow.tv_usec - skb->stamp.tv_usec); + if (seq_usrtt) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 + + (usnow.tv_usec - tv.tv_usec); + } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 267b0fcbfc9c..8d92ab562aed 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -282,7 +282,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* If congestion control is doing timestamping */ if (icsk->icsk_ca_ops->rtt_sample) - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { @@ -483,7 +483,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned * skbs, which it never sent before. --ANK */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; - buff->stamp = skb->stamp; + buff->tstamp = skb->tstamp; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= tcp_skb_pcount(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ae72d4c9bd2..a7eae30f4554 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { - if (skb->stamp.tv_sec != LOCALLY_ENQUEUED && + if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && idev->nd_parms->proxy_delay != 0) { @@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: ndisc_recv_ns(skb); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7ecb91e24a34..446764545b10 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 59e7c6317872..9d9e04344c77 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (skb->dev) fq->iif = skb->dev->ifindex; skb->dev = NULL; - fq->stamp = skb->stamp; + skb_get_timestamp(skb, &fq->stamp); fq->meat += skb->len; atomic_add(skb->truesize, &ip6_frag_mem); @@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->next = NULL; head->dev = dev; - head->stamp = fq->stamp; + skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); *skb_in = head; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 39d5939ccd6c..c54f8acc97eb 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, copied); if (rc) goto out_free; - if (skb->stamp.tv_sec) - sk->sk_stamp = skb->stamp; + if (skb->tstamp.off_sec) + skb_get_timestamp(skb, &sk->sk_stamp) msg->msg_namelen = sizeof(*sipx); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 464c9fa2934b..ff5601ceedcb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -491,11 +491,11 @@ __build_packet_message(struct nfulnl_instance *inst, NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); } - if (skb->stamp.tv_sec) { + if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb->stamp.tv_sec); - ts.usec = cpu_to_be64(skb->stamp.tv_usec); + ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 741686ff71d8..e3a5285329af 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -494,11 +494,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); } - if (entry->skb->stamp.tv_sec) { + if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = htonll(entry->skb->stamp.tv_sec); - ts.usec = htonll(entry->skb->stamp.tv_usec); + ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); + ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index deb5f6f7f858..ba997095f08f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -635,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb->stamp.tv_sec; - h->tp_usec = skb->stamp.tv_usec; + h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index 742be9171b7d..28f32243397f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb) } /* SCTP seems to always need a timestamp right now (FIXME) */ - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 00d32b7c8266..3868a8d70cc0 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1362,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( char *key; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; + struct timeval tv; headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE; bodysize = ntohs(chunk->chunk_hdr->length) - headersize; @@ -1434,7 +1435,8 @@ no_hmac: * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. */ - if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) { + skb_get_timestamp(skb, &tv); + if (!asoc && tv_lt(bear_cookie->expiration, tv)) { __u16 len; /* * Section 3.3.10.3 Stale Cookie Error (3) @@ -1447,10 +1449,9 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (skb->stamp.tv_sec - + suseconds_t usecs = (tv.tv_sec - bear_cookie->expiration.tv_sec) * 1000000L + - skb->stamp.tv_usec - - bear_cookie->expiration.tv_usec; + tv.tv_usec - bear_cookie->expiration.tv_usec; usecs = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 199d3747bd42..05fe2e735538 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } - if (skb->stamp.tv_sec == 0) { - skb->stamp.tv_sec = xtime.tv_sec; - skb->stamp.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; + if (skb->tstamp.off_sec == 0) { + struct timeval tv; + + tv.tv_sec = xtime.tv_sec; + tv.tv_usec = xtime.tv_nsec * 1000; + skb_set_timestamp(skb, &tv); /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* From fb13ab2849074244a51ae5147483610529a29ced Mon Sep 17 00:00:00 2001 From: Domen Puncer Date: Sun, 14 Aug 2005 17:32:05 -0700 Subject: [PATCH 393/584] [NETFILTER]: Remove two unused files Signed-off-by: Domen Puncer Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_logging.h | 20 -------------------- include/linux/netfilter_ipv6/ip6_logging.h | 20 -------------------- 2 files changed, 40 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ip_logging.h delete mode 100644 include/linux/netfilter_ipv6/ip6_logging.h diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h deleted file mode 100644 index 0c5c52cb6589..000000000000 --- a/include/linux/netfilter_ipv4/ip_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv4 macros for the internal logging interface. */ -#ifndef __IP_LOGGING_H -#define __IP_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip(pfh,len,fmt,args...) \ - nf_log(AF_INET,pfh,len,fmt,##args) - -#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging) -#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP_LOGGING_H*/ diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h deleted file mode 100644 index a0b2ee3043aa..000000000000 --- a/include/linux/netfilter_ipv6/ip6_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv6 macros for the nternal logging interface. */ -#ifndef __IP6_LOGGING_H -#define __IP6_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip6(pfh,len,fmt,args...) \ - nf_log(AF_INET6,pfh,len,fmt,##args) - -#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging) -#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP6_LOGGING_H*/ From 9baa5c67ff4ce57b6b9f68c90714a1bb876fccd7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:32:50 -0700 Subject: [PATCH 394/584] [NETFILTER]: Don't exclude local packets from MASQUERADING Increases consistency in source-address selection. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_MASQUERADE.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 91e74502c3d3..2f3e181c8e97 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -86,11 +86,6 @@ masquerade_target(struct sk_buff **pskb, IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); - /* FIXME: For the moment, don't do local packets, breaks - testsuite for 2.3.49 --RR */ - if ((*pskb)->sk) - return NF_ACCEPT; - ct = ip_conntrack_get(*pskb, &ctinfo); IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); From 000efe1d86620244b8e017429e57fab4170ab05a Mon Sep 17 00:00:00 2001 From: Gary Wayne Smith Date: Sun, 14 Aug 2005 17:33:24 -0700 Subject: [PATCH 395/584] [NETFILTER]: Make NETMAP target usable in OUTPUT Signed-off-by: Gary Wayne Smith Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_NETMAP.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 06254b29d034..e6e7b6095363 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -46,7 +46,8 @@ check(const char *tablename, DEBUGP(MODULENAME":check: size %u.\n", targinfosize); return 0; } - if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) { + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | + (1 << NF_IP_LOCAL_OUT))) { DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask); return 0; } @@ -76,12 +77,13 @@ target(struct sk_buff **pskb, struct ip_nat_range newrange; IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING - || hooknum == NF_IP_POST_ROUTING); + || hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_OUT); ct = ip_conntrack_get(*pskb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (hooknum == NF_IP_PRE_ROUTING) + if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) new_ip = (*pskb)->nh.iph->daddr & ~netmask; else new_ip = (*pskb)->nh.iph->saddr & ~netmask; From 34b4a4a624bafe089107966a6c56d2a1aca026d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 14 Aug 2005 17:33:59 -0700 Subject: [PATCH 396/584] [NETFILTER]: Remove tasklist_lock abuse in ipt{,6}owner Rip out cmd/sid/pid matching since its unfixable broken and stands in the way of locking changes to tasklist_lock. Signed-off-by: Christoph Hellwig Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_owner.c | 132 ++------------------------------ net/ipv6/netfilter/ip6t_owner.c | 90 ++-------------------- 2 files changed, 14 insertions(+), 208 deletions(-) diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 3b9065e06381..c1889f88262b 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -20,106 +20,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables owner match"); -static int -match_comm(const struct sk_buff *skb, const char *comm) -{ - struct task_struct *g, *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - if(strncmp(p->comm, comm, sizeof(p->comm))) - continue; - - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_pid(const struct sk_buff *skb, pid_t pid) -{ - struct task_struct *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - if (!p) - goto out; - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); -out: - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_sid(const struct sk_buff *skb, pid_t sid) -{ - struct task_struct *g, *p; - struct file *file = skb->sk->sk_socket->file; - int i, found=0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - struct files_struct *files; - if (p->signal->session != sid) - continue; - - task_lock(p); - files = p->files; - if (files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == file) { - found = 1; - break; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - if (found) - goto out; - } while_each_thread(g, p); -out: - read_unlock(&tasklist_lock); - - return found; -} - static int match(const struct sk_buff *skb, const struct net_device *in, @@ -145,24 +45,6 @@ match(const struct sk_buff *skb, return 0; } - if(info->match & IPT_OWNER_PID) { - if (!match_pid(skb, info->pid) ^ - !!(info->invert & IPT_OWNER_PID)) - return 0; - } - - if(info->match & IPT_OWNER_SID) { - if (!match_sid(skb, info->sid) ^ - !!(info->invert & IPT_OWNER_SID)) - return 0; - } - - if(info->match & IPT_OWNER_COMM) { - if (!match_comm(skb, info->comm) ^ - !!(info->invert & IPT_OWNER_COMM)) - return 0; - } - return 1; } @@ -173,6 +55,8 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + const struct ipt_owner_info *info = matchinfo; + if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); @@ -184,15 +68,13 @@ checkentry(const char *tablename, IPT_ALIGN(sizeof(struct ipt_owner_info))); return 0; } -#ifdef CONFIG_SMP - /* files->file_lock can not be used in a BH */ - if (((struct ipt_owner_info *)matchinfo)->match - & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { - printk("ipt_owner: pid, sid and command matching is broken " - "on SMP.\n"); + + if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { + printk("ipt_owner: pid, sid and command matching " + "not supported anymore\n"); return 0; } -#endif + return 1; } diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index ab0e32d3de46..9b91decbfddb 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -20,71 +20,6 @@ MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("IP6 tables owner matching module"); MODULE_LICENSE("GPL"); -static int -match_pid(const struct sk_buff *skb, pid_t pid) -{ - struct task_struct *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - if (!p) - goto out; - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); -out: - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_sid(const struct sk_buff *skb, pid_t sid) -{ - struct task_struct *g, *p; - struct file *file = skb->sk->sk_socket->file; - int i, found=0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - struct files_struct *files; - if (p->signal->session != sid) - continue; - - task_lock(p); - files = p->files; - if (files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == file) { - found = 1; - break; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - if (found) - goto out; - } while_each_thread(g, p); -out: - read_unlock(&tasklist_lock); - - return found; -} static int match(const struct sk_buff *skb, @@ -112,18 +47,6 @@ match(const struct sk_buff *skb, return 0; } - if(info->match & IP6T_OWNER_PID) { - if (!match_pid(skb, info->pid) ^ - !!(info->invert & IP6T_OWNER_PID)) - return 0; - } - - if(info->match & IP6T_OWNER_SID) { - if (!match_sid(skb, info->sid) ^ - !!(info->invert & IP6T_OWNER_SID)) - return 0; - } - return 1; } @@ -134,6 +57,8 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + const struct ip6t_owner_info *info = matchinfo; + if (hook_mask & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); @@ -142,14 +67,13 @@ checkentry(const char *tablename, if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info))) return 0; -#ifdef CONFIG_SMP - /* files->file_lock can not be used in a BH */ - if (((struct ip6t_owner_info *)matchinfo)->match - & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { - printk("ip6t_owner: pid and sid matching is broken on SMP.\n"); + + if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { + printk("ipt_owner: pid and sid matching " + "not supported anymore\n"); return 0; } -#endif + return 1; } From c173437669967301facff151bfeb7bae67354e4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:34:23 -0300 Subject: [PATCH 397/584] [PACKET_HISTORY]: Add dccphtx_rtt and rename the win_count fields As requested by Ian. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 22 +++++++++++----------- net/dccp/packet_history.h | 13 +++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 80f12c990c00..edf9740d8d82 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1004,7 +1004,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Can we send? if so add options and add to packet history */ if (rc == 0) - new_packet->dccphtx_win_count = + new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; out: @@ -1060,7 +1060,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) min_t(unsigned long, quarter_rtt, 5)) % 16; ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", dccp_role(sk), sk, - packet->dccphtx_win_count, + packet->dccphtx_ccval, hctx->ccid3hctx_last_win_count); } /* COMPLIANCE_END */ @@ -1068,9 +1068,10 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", dccp_role(sk), sk, packet->dccphtx_seqno, - packet->dccphtx_win_count); + packet->dccphtx_ccval); #endif hctx->ccid3hctx_idle = 0; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", @@ -1489,11 +1490,10 @@ trim_history: step = 2; /* OK, find next data packet */ num_later = 1; - win_count = entry->dccphrx_win_count; + win_count = entry->dccphrx_ccval; break; case 2: - tmp = (win_count - - entry->dccphrx_win_count); + tmp = win_count - entry->dccphrx_ccval; if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { @@ -1553,7 +1553,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); - hcrx->ccid3hcrx_last_counter = packet->dccphrx_win_count; + hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; @@ -1645,11 +1645,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) switch (step) { case 0: tstamp = entry->dccphrx_tstamp; - win_count = entry->dccphrx_win_count; + win_count = entry->dccphrx_ccval; step = 1; break; case 1: - interval = win_count - entry->dccphrx_win_count; + interval = win_count - entry->dccphrx_ccval; if (interval < 0) interval += TFRC_WIN_COUNT_LIMIT; if (interval > 4) @@ -1816,7 +1816,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->dccphrx_win_count; + win_loss = a_loss->dccphrx_ccval; out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); @@ -1918,7 +1918,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - win_count = packet->dccphrx_win_count; + win_count = packet->dccphrx_ccval; ins = ccid3_hc_rx_add_hist(sk, packet); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 565dc96506e9..0056525a656c 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -47,15 +47,16 @@ struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, - dccphtx_win_count:8, + dccphtx_ccval:4, dccphtx_sent:1; + u32 dccphtx_rtt; struct timeval dccphtx_tstamp; }; struct dccp_rx_hist_entry { struct list_head dccphrx_node; u64 dccphrx_seqno:48, - dccphrx_win_count:4, + dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ struct timeval dccphrx_tstamp; @@ -136,10 +137,10 @@ static inline struct dccp_rx_hist_entry * if (entry != NULL) { const struct dccp_hdr *dh = dccp_hdr(skb); - entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->dccphrx_win_count = dh->dccph_ccval; - entry->dccphrx_type = dh->dccph_type; - entry->dccphrx_ndp = ndp; + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_ccval = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; do_gettimeofday(&(entry->dccphrx_tstamp)); } From 7690af3fff7633e40b1b9950eb8489129251d074 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:34:54 -0300 Subject: [PATCH 398/584] [DCCP]: Just reflow the source code to fit in 80 columns Andrew Morton should be happy now 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 18 ++-- net/dccp/dccp.h | 56 ++++++---- net/dccp/input.c | 100 +++++++++++------- net/dccp/ipv4.c | 114 +++++++++++++------- net/dccp/minisocks.c | 37 ++++--- net/dccp/options.c | 213 +++++++++++++++++++++++--------------- net/dccp/output.c | 44 +++++--- net/dccp/packet_history.c | 11 +- net/dccp/packet_history.h | 6 +- net/dccp/proto.c | 51 +++++---- net/dccp/timer.c | 36 ++++--- 11 files changed, 429 insertions(+), 257 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 469f9a14b463..95eb47d85517 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -30,21 +30,26 @@ struct ccid { int (*ccid_hc_tx_init)(struct sock *sk); void (*ccid_hc_rx_exit)(struct sock *sk); void (*ccid_hc_tx_exit)(struct sock *sk); - void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, + struct sk_buff *skb); int (*ccid_hc_rx_parse_options)(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char* value); - void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); - void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); - void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_rx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, + struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, struct sk_buff *skb, int len); - void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, + int len); }; extern int ccid_register(struct ccid *ccid); @@ -123,7 +128,8 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, { int rc = 0; if (ccid->ccid_hc_tx_parse_options != NULL) - rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, + value); return rc; } diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 8a0d7af649e4..62e735f1807d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -25,7 +25,8 @@ extern int dccp_debug; do { if (dccp_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ } while (0) -#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) +#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ + printk(format, ##a); } while (0) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) @@ -72,7 +73,8 @@ static inline const int after48(const u64 seq1, const u64 seq2) } /* is seq2 <= seq1 <= seq3 ? */ -static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) +static inline const int between48(const u64 seq1, const u64 seq2, + const u64 seq3) { return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); } @@ -107,12 +109,14 @@ struct dccp_mib { } __SNMP_MIB_ALIGN__; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) -#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) -#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_ADD_STATS_BH(field, val) \ + SNMP_ADD_STATS_BH(dccp_statistics, field, val) +#define DCCP_ADD_STATS_USER(field, val) \ + SNMP_ADD_STATS_USER(dccp_statistics, field, val) extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); @@ -234,8 +238,8 @@ extern int dccp_disconnect(struct sock *sk, int flags); extern int dccp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); +extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size); extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); @@ -246,7 +250,8 @@ extern void dccp_shutdown(struct sock *sk, int how); extern int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr); -extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); +extern int dccp_v4_send_reset(struct sock *sk, + enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk); struct dccp_skb_cb { @@ -303,7 +308,8 @@ static inline void dccp_inc_seqno(u64 *seqno) static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) { - struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + + sizeof(*dh)); #if defined(__LITTLE_ENDIAN_BITFIELD) dh->dccph_seq = htonl((gss >> 32)) >> 8; @@ -315,7 +321,8 @@ static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) dhx->dccph_seq_low = htonl(gss & 0xffffffff); } -static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) +static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, + const u64 gsr) { #if defined(__LITTLE_ENDIAN_BITFIELD) dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; @@ -332,11 +339,14 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) struct dccp_sock *dp = dccp_sk(sk); u64 tmp_gsr; - dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); + dccp_set_seqno(&tmp_gsr, + (dp->dccps_gsr + 1 - + (dp->dccps_options.dccpo_sequence_window / 4))); dp->dccps_gsr = seq; dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); dccp_set_seqno(&dp->dccps_swh, - dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); + (dp->dccps_gsr + + (3 * dp->dccps_options.dccpo_sequence_window) / 4)); } static inline void dccp_update_gss(struct sock *sk, u64 seq) @@ -344,7 +354,9 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) struct dccp_sock *dp = dccp_sk(sk); u64 tmp_gss; - dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); + dccp_set_seqno(&tmp_gss, + (dp->dccps_gss - + dp->dccps_options.dccpo_sequence_window + 1)); dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); dp->dccps_awh = dp->dccps_gss = seq; } @@ -373,16 +385,20 @@ extern struct socket *dccp_ctl_socket; * * @dccpap_buf_head - circular buffer head * @dccpap_buf_tail - circular buffer tail - * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) - * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 + * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 * * Additionally, the HC-Receiver must keep some information about the * Ack Vectors it has recently sent. For each packet sent carrying an * Ack Vector, it remembers four variables: * - * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) + * @dccpap_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * * @dccpap_buf_len - circular buffer length diff --git a/net/dccp/input.c b/net/dccp/input.c index bdaecde0bde1..4b8638f153a5 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -93,7 +93,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + dp->dccps_awl, dp->dccps_awh) && !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); else @@ -122,11 +123,13 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + lawl, dp->dccps_awh))) { dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); if (dh->dccph_type != DCCP_PKT_SYNC && - DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + (DCCP_SKB_CB(skb)->dccpd_ack_seq != + DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { dccp_pr_debug("Step 6 failed, sending SYNC...\n"); @@ -161,10 +164,13 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable packets buffer full!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable " + "packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); goto discard; } @@ -175,7 +181,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if (!inet_csk_ack_scheduled(sk)) { inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, + DCCP_RTO_MAX); } } @@ -186,8 +193,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, case DCCP_PKT_DATAACK: case DCCP_PKT_DATA: /* - * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option - * if it is. + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED + * option if it is. */ __skb_pull(skb, dh->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -272,11 +279,13 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; - if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { - dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", - (unsigned long long) dp->dccps_awl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dp->dccps_awh); + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + dp->dccps_awl, dp->dccps_awh)) { + dccp_pr_debug("invalid ackno: S.AWL=%llu, " + "P.ackno=%llu, S.AWH=%llu \n", + (unsigned long long)dp->dccps_awl, + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long)dp->dccps_awh); goto out_invalid_packet; } @@ -296,16 +305,17 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, /* * Step 10: Process REQUEST state (second part) * If S.state == REQUEST, - * / * If we get here, P is a valid Response from the server (see - * Step 4), and we should move to PARTOPEN state. PARTOPEN - * means send an Ack, don't send Data packets, retransmit - * Acks periodically, and always include any Init Cookie from - * the Response * / + * / * If we get here, P is a valid Response from the + * server (see Step 4), and we should move to + * PARTOPEN state. PARTOPEN means send an Ack, + * don't send Data packets, retransmit Acks + * periodically, and always include any Init Cookie + * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer * Continue with S.state == PARTOPEN - * / * Step 12 will send the Ack completing the three-way - * handshake * / + * / * Step 12 will send the Ack completing the + * three-way handshake * / */ dccp_set_state(sk, DCCP_PARTOPEN); @@ -341,7 +351,8 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, out_invalid_packet: return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ + FIXME: the reset code should be + DCCP_RESET_CODE_PACKET_ERROR */ } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -358,11 +369,12 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* - * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, - * but only if we haven't used the DELACK timer for something else, - * like sending a delayed ack for a TIMESTAMP echo, etc, for now - * were not clearing it, sending an extra ACK when there is nothing - * else to do in DELACK is not a big deal after all. + * FIXME: we should be reseting the PARTOPEN (DELACK) timer + * here but only if we haven't used the DELACK timer for + * something else, like sending a delayed ack for a TIMESTAMP + * echo, etc, for now were not clearing it, sending an extra + * ACK when there is nothing else to do in DELACK is not a big + * deal after all. */ /* Stop the PARTOPEN timer */ @@ -374,7 +386,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_DATAACK) { dccp_rcv_established(sk, skb, dh, len); - queued = 1; /* packet was queued (by dccp_rcv_established) */ + queued = 1; /* packet was queued + (by dccp_rcv_established) */ } break; } @@ -399,7 +412,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != + DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); @@ -415,14 +429,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; /* - * FIXME: this activation is probably wrong, have to study more - * TCP delack machinery and how it fits into DCCP draft, but - * for now it kinda "works" 8) + * FIXME: this activation is probably wrong, have to + * study more TCP delack machinery and how it fits into + * DCCP draft, but for now it kinda "works" 8) */ - if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && + if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == + DCCP_MAX_SEQNO + 1) && !inet_csk_ack_scheduled(sk)) { inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); } } } @@ -436,7 +453,10 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { - /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + /* + * Queue the equivalent of TCP fin so that dccp_recvmsg + * exits the loop + */ dccp_fin(sk, skb); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); return 0; @@ -450,10 +470,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + (dh->dccph_type == DCCP_PKT_RESPONSE || + dh->dccph_type == DCCP_PKT_CLOSEREQ)) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + (sk->sk_state == DCCP_RESPOND && + dh->dccph_type == DCCP_PKT_DATA)) { dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); goto discard; } @@ -491,11 +513,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case DCCP_RESPOND: case DCCP_PARTOPEN: - queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); + queued = dccp_rcv_respond_partopen_state_process(sk, skb, + dh, len); break; } - if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_ACK || + dh->dccph_type == DCCP_PKT_DATAACK) { switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d3770aed3b15..42d9c878d4c3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -29,7 +29,7 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), .portalloc_lock = SPIN_LOCK_UNLOCKED, .port_rover = 1024 - 1, }; @@ -61,7 +61,8 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, const int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, + dccp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; const struct sock *sk2; const struct hlist_node *node; @@ -133,11 +134,12 @@ static int dccp_v4_hash_connect(struct sock *sk) local_bh_disable(); /* TODO. Actually it is not so bad idea to remove - * dccp_hashinfo.portalloc_lock before next submission to Linus. + * dccp_hashinfo.portalloc_lock before next submission to + * Linus. * As soon as we touch this place at all it is time to think. * - * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, - * hence it is mostly useless. + * Now it protects single _advisory_ variable + * dccp_hashinfo.port_rover, hence it is mostly useless. * Code will work nicely if we just delete it, but * I am afraid in contented case it will work not better or * even worse: another cpu just will hit the same bucket @@ -152,7 +154,8 @@ static int dccp_v4_hash_connect(struct sock *sk) rover++; if ((rover < low) || (rover > high)) rover = low; - head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, + dccp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -172,7 +175,8 @@ static int dccp_v4_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, + head, rover); if (tb == NULL) { spin_unlock(&head->lock); break; @@ -211,7 +215,8 @@ ok: goto out; } - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, + dccp_hashinfo.bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { @@ -313,7 +318,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, out: return err; failure: - /* This unhashes the socket and releases the local port, if necessary. */ + /* + * This unhashes the socket and releases the local port, if necessary. + */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; @@ -365,8 +372,9 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* * From: draft-ietf-dccp-spec-11.txt * - * DCCP-Sync packets are the best choice for upward probing, - * since DCCP-Sync probes do not risk application data loss. + * DCCP-Sync packets are the best choice for upward + * probing, since DCCP-Sync probes do not risk application + * data loss. */ dccp_send_sync(sk, dp->dccps_gsr); } /* else let the usual retransmit timer handle it */ @@ -405,11 +413,13 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) dh->dccph_x = 1; dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), + DCCP_SKB_CB(rxskb)->dccpd_seq); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + rxskb->nh.iph->daddr, + rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_ctl_socket->sk); if (err == NET_XMIT_CN || err == 0) { @@ -418,7 +428,8 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) } } -static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, + struct request_sock *req) { dccp_v4_ctl_send_ack(skb); } @@ -465,7 +476,8 @@ out: void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + + (iph->ihl << 2)); struct dccp_sock *dp; struct inet_sock *inet; const int type = skb->h.icmph->type; @@ -605,7 +617,8 @@ out: sock_put(sk); } -extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); +extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, + enum dccp_reset_codes code); int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) { @@ -689,7 +702,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->loc_addr = daddr; ireq->rmt_addr = saddr; /* FIXME: Merge Aristeu's option parsing code when ready */ - req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ + req->rcv_wnd = 100; /* Fake, option parsing will get the + right value */ ireq->opt = NULL; /* @@ -804,7 +818,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr) +int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, + const u32 daddr) { const struct dccp_hdr* dh = dccp_hdr(skb); int checksum_len; @@ -814,11 +829,13 @@ int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr checksum_len = skb->len; else { checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + checksum_len = checksum_len < skb->len ? checksum_len : + skb->len; } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp); + return csum_tcpudp_magic(saddr, daddr, checksum_len, + IPPROTO_DCCP, tmp); } static int dccp_v4_verify_checksum(struct sk_buff *skb, @@ -832,10 +849,12 @@ static int dccp_v4_verify_checksum(struct sk_buff *skb, checksum_len = skb->len; else { checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + checksum_len = checksum_len < skb->len ? checksum_len : + skb->len; } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp) == 0 ? 0 : -1; + return csum_tcpudp_magic(saddr, daddr, checksum_len, + IPPROTO_DCCP, tmp) == 0 ? 0 : -1; } static struct dst_entry* dccp_v4_route_skb(struct sock *sk, @@ -850,7 +869,9 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } } }; + .dport = dccp_hdr(skb)->dccph_sport } + } + }; if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -899,17 +920,20 @@ void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dh->dccph_dport = rxdh->dccph_sport; dh->dccph_doff = dccp_hdr_reset_len / 4; dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; + dccp_hdr_reset(skb)->dccph_reset_code = + DCCP_SKB_CB(rxskb)->dccpd_reset_code; dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), + DCCP_SKB_CB(rxskb)->dccpd_seq); dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, rxskb->nh.iph->daddr); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + rxskb->nh.iph->daddr, + rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_ctl_socket->sk); if (err == NET_XMIT_CN || err == 0) { @@ -933,7 +957,8 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie option, + * If P.type == Request or P contains a valid Init Cookie + * option, * * Must scan the packet's options to check for an Init * Cookie. Only the Init Cookie is processed here, * however; other options are processed in Step 8. This @@ -950,7 +975,8 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * - * NOTE: the check for the packet types is done in dccp_rcv_state_process + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process */ if (sk->sk_state == DCCP_LISTEN) { struct sock *nsk = dccp_v4_hnd_req(sk, skb); @@ -1007,7 +1033,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); + dccp_pr_debug("P.Data Offset(%u) too small 2\n", + dh->dccph_doff); return 1; } @@ -1021,8 +1048,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and " + "P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; } @@ -1055,10 +1082,11 @@ int dccp_v4_rcv(struct sk_buff *skb) * dccp_ackpkts_add, you'll get something like this on a session that * sends 10 DATA/DATAACK packets: * - * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| * * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet - * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets + * with the same state * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet * * So... @@ -1072,10 +1100,12 @@ int dccp_v4_rcv(struct sk_buff *skb) * 281473596467416 was received * 281473596467415 was not received * 281473596467414 was received - * 281473596467413 was received (this one was the 3way handshake RESPONSE) + * 281473596467413 was received (this one was the 3way handshake + * RESPONSE) * */ - if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) { static int discard = 0; if (discard) { @@ -1170,7 +1200,8 @@ no_dccp_socket: * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + DCCP_SKB_CB(skb)->dccpd_reset_code = + DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); } @@ -1196,8 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_options_init(&dp->dccps_options); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_KERNEL); + dp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); if (dp->dccps_hc_rx_ackpkts == NULL) return -ENOMEM; @@ -1211,8 +1243,10 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + sk); + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index a6a0b270fb6c..b8e67207e97e 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -69,8 +69,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - if (net_ratelimit()) - printk(KERN_INFO "DCCP: time wait bucket table overflow\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " + "table overflow\n"); } dccp_done(sk); @@ -98,19 +98,23 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newicsk->icsk_rto = DCCP_TIMEOUT_INIT; if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); /* - * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone - * copied the master sock and left the CCID pointers for this child, - * that is why we do the __ccid_get calls. + * XXX: We're using the same CCIDs set on the parent, + * i.e. sk_clone copied the master sock and left the + * CCID pointers for this child, that is why we do the + * __ccid_get calls. */ if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) goto out_free; } - if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || - ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, + newsk) != 0 || + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, + newsk) != 0)) { dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); @@ -129,7 +133,8 @@ out_free: * Step 3: Process LISTEN state * * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init + * Cookie */ /* See dccp_v4_conn_request */ @@ -160,13 +165,15 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, + dccp_rsk(req)->dreq_isr)) { struct dccp_request_sock *dreq = dccp_rsk(req); dccp_pr_debug("Retransmitted REQUEST\n"); /* Send another RESPONSE packet */ dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); - dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_set_seqno(&dreq->dreq_isr, + DCCP_SKB_CB(skb)->dccpd_seq); req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ @@ -181,7 +188,8 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Invalid ACK */ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { - dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " + "dreq_iss=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long) @@ -223,7 +231,8 @@ int dccp_child_process(struct sock *parent, struct sock *child, const int state = child->sk_state; if (!sock_owned_by_user(child)) { - ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), + skb->len); /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) diff --git a/net/dccp/options.c b/net/dccp/options.c index 5bf997683a16..68d6614edcf1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -59,14 +59,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : - "server rx opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx opt: " : "server rx opt: "; #endif const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; - const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); + const unsigned char *opt_end = (unsigned char *)dh + + (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; @@ -106,7 +107,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, + opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: if (len > DCCP_MAX_ACK_VECTOR_LEN) @@ -124,8 +126,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); - dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, + sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, len, value); break; case DCCPO_TIMESTAMP: @@ -148,15 +151,21 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", - debug_prefix, opt_recv->dccpor_timestamp_echo, + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, " + "diff=%u\n", + debug_prefix, + opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, - tcp_time_stamp - opt_recv->dccpor_timestamp_echo); + (tcp_time_stamp - + opt_recv->dccpor_timestamp_echo)); - opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time = + dccp_decode_value_var(value + 4, + len - 4); + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", + debug_prefix, opt_recv->dccpor_elapsed_time); break; case DCCPO_ELAPSED_TIME: @@ -165,33 +174,41 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); + opt_recv->dccpor_elapsed_time = + dccp_decode_value_var(value, len); dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: * - * Option numbers 128 through 191 are for options sent from the HC- - * Sender to the HC-Receiver; option numbers 192 through 255 are for - * options sent from the HC-Receiver to the HC-Sender. + * Option numbers 128 through 191 are for + * options sent from the HC-Sender to the + * HC-Receiver; option numbers 192 through 255 + * are for options sent from the HC-Receiver to + * the HC-Sender. */ case 128 ... 191: { const u16 idx = value - options; - if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, + opt, len, idx, + value) != 0) goto out_invalid_option; } break; case 192 ... 255: { const u16 idx = value - options; - if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, + opt, len, idx, + value) != 0) goto out_invalid_option; } break; default: - pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", + pr_info("DCCP(%p): option %d(len=%d) not " + "implemented, ignoring\n", sk, opt, len); break; } @@ -231,7 +248,8 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert %d option!\n", option); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " + "%d option!\n", option); return; } @@ -287,8 +305,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 2 + elapsed_time_len; @@ -299,7 +317,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert elapsed time!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " + "insert elapsed time!\n"); return; } @@ -323,8 +342,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; @@ -335,7 +354,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert ACK Vector!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " + "insert ACK Vector!\n"); return; } @@ -360,7 +380,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) /* Check if buf_head wraps */ if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { - const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; + const unsigned int tailsize = (ap->dccpap_buf_len - + ap->dccpap_buf_head); memcpy(to, from, tailsize); to += tailsize; @@ -375,8 +396,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) * For each acknowledgement it sends, the HC-Receiver will add an * acknowledgement record. ack_seqno will equal the HC-Receiver * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal - * buf_nonce. + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. * * This implemention uses just one ack record for now. */ @@ -386,33 +407,38 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", debug_prefix, ap->dccpap_ack_vector_len, (unsigned long long) ap->dccpap_ack_seqno, (unsigned long long) ap->dccpap_ack_ackno); } -static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +static inline void dccp_insert_option_timestamp(struct sock *sk, + struct sk_buff *skb) { const u32 now = htonl(tcp_time_stamp); dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } -static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) +static void dccp_insert_option_timestamp_echo(struct sock *sk, + struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = jiffies_to_usecs(jiffies - + dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert timestamp echo!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " + "timestamp echo!\n"); return; } @@ -447,7 +473,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) + (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != + DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); dccp_insert_option_timestamp(sk, skb); @@ -480,12 +507,16 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) #ifdef DCCP_DEBUG memset(ap->dccpap_buf, 0xFF, len); #endif - ap->dccpap_buf_len = len; - ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; - ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; - ap->dccpap_ack_ptr = 0; - ap->dccpap_time = 0; + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = + ap->dccpap_buf_tail = + ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = + ap->dccpap_ack_ackno = + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time = 0; ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; } @@ -567,15 +598,16 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) * * From Appendix A: * - * Of course, the circular buffer may overflow, either when the HC- - * Sender is sending data at a very high rate, when the HC-Receiver's - * acknowledgements are not reaching the HC-Sender, or when the HC- - * Sender is forgetting to acknowledge those acks (so the HC-Receiver - * is unable to clean up old state). In this case, the HC-Receiver - * should either compress the buffer (by increasing run lengths when - * possible), transfer its state to a larger buffer, or, as a last - * resort, drop all received packets, without processing them - * whatsoever, until its buffer shrinks again. + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. */ /* See if this is the first ackno being inserted */ @@ -583,15 +615,17 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) ap->dccpap_buf[ap->dccpap_buf_head] = state; ap->dccpap_buf_vector_len = 1; } else if (after48(ackno, ap->dccpap_buf_ackno)) { - const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, + ackno); /* - * Look if the state of this packet is the same as the previous ackno - * and if so if we can bump the head len. + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. */ if (delta == 1 && dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && - dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) + (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < + DCCP_ACKPKTS_LEN_MASK)) ap->dccpap_buf[ap->dccpap_buf_head]++; else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) return -ENOBUFS; @@ -599,9 +633,10 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) /* * A.1.2. Old Packets * - * When a packet with Sequence Number S arrives, and S <= buf_ackno, - * the HC-Receiver will scan the table for the byte corresponding to S. - * (Indexing structures could reduce the complexity of this scan.) + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) */ u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); unsigned int index = ap->dccpap_buf_head; @@ -610,11 +645,12 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) const u8 len = dccp_ackpkts_len(ap, index); const u8 state = dccp_ackpkts_state(ap, index); /* - * valid packets not yet in dccpap_buf have a reserved entry, with - * a len equal to 0 + * valid packets not yet in dccpap_buf have a reserved + * entry, with a len equal to 0. */ if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our reserved seat! */ + len == 0 && delta == 0) { /* Found our + reserved seat! */ dccp_pr_debug("Found %llu reserved seat!\n", (unsigned long long) ackno); ap->dccpap_buf[index] = state; @@ -639,13 +675,14 @@ out: out_duplicate: /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", - (unsigned long long) ackno); + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long) ackno); return -EILSEQ; } #ifdef DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, + int len) { if (!dccp_debug) return; @@ -678,8 +715,9 @@ static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) * As we're keeping track of the ack vector size * (dccpap_buf_vector_len) and the sent ack vector size * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but - * keep this code here as in the future we'll implement a vector of ack - * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme + * keep this code here as in the future we'll implement a vector of + * ack records, as suggested in draft-ietf-dccp-spec-11.txt + * Appendix A. -acme */ #if 0 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; @@ -699,10 +737,11 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, if (ackno == ap->dccpap_ack_seqno) { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : - "server rx ack: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; #endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", debug_prefix, 1, (unsigned long long) ap->dccpap_ack_seqno, (unsigned long long) ap->dccpap_ack_ackno); @@ -722,20 +761,21 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) return; /* - * We're in the receiver half connection, so if the received an ACK vector - * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're + * not interested. * * Extra explanation with example: * * if we received an ACK vector with ackno 50, it can only be acking * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). */ - // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); + /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ if (before48(ackno, ap->dccpap_ack_seqno)) { - // dccp_pr_debug_cat("yes\n"); + /* dccp_pr_debug_cat("yes\n"); */ return; } - // dccp_pr_debug_cat("no\n"); + /* dccp_pr_debug_cat("no\n"); */ i = len; while (i--) { @@ -744,18 +784,25 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, dccp_set_seqno(&ackno_end_rl, ackno - rl); - // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * ap->dccpap_ack_seqno, ackno); + */ if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; - // dccp_pr_debug_cat("yes\n"); + const u8 state = (*vector & + DCCP_ACKPKTS_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : - "server rx ack: "; + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; #endif - dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", debug_prefix, len, (unsigned long long) ap->dccpap_ack_seqno, @@ -764,13 +811,13 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, dccp_ackpkts_trow_away_ack_record(ap); } /* - * If dccpap_ack_seqno was not received, no problem we'll - * send another ACK vector. + * If dccpap_ack_seqno was not received, no problem + * we'll send another ACK vector. */ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; break; } - // dccp_pr_debug_cat("no\n"); + /* dccp_pr_debug_cat("no\n"); */ dccp_set_seqno(&ackno, ackno_end_rl - 1); ++vector; diff --git a/net/dccp/output.c b/net/dccp/output.c index 50292c0605fb..dcc061bed924 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -40,13 +40,13 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ const int dccp_header_size = sizeof(*dh) + sizeof(struct dccp_hdr_ext) + - dccp_packet_hdr_len(dcb->dccpd_type); + dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; /* - * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing - * to do here... + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right + * thing to do here... */ dccp_inc_seqno(&dp->dccps_gss); @@ -65,7 +65,9 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); - /* Data packets are not cloned as they are never retransmitted */ + /* + * Data packets are not cloned as they are never retransmitted + */ if (skb_cloned(skb)) skb_set_owner_w(skb, sk); @@ -86,10 +88,12 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: - dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; + dccp_hdr_request(skb)->dccph_req_service = + dcb->dccpd_service; break; case DCCP_PKT_RESET: - dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; + dccp_hdr_reset(skb)->dccph_reset_code = + dcb->dccpd_reset_code; break; } @@ -123,10 +127,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) int mss_now; /* - * FIXME: we really should be using the af_specific thing to support IPv6. - * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + * FIXME: we really should be using the af_specific thing to support + * IPv6. + * mss_now = pmtu - tp->af_specific->net_header_len - + * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); */ - mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - + sizeof(struct dccp_hdr_ext); /* Now subtract optional transport overhead */ mss_now -= dp->dccps_ext_header_len; @@ -223,7 +230,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_rsk(req)->rmt_port; - dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_doff = (dccp_header_size + + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); @@ -271,7 +279,8 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_sk(sk)->dport; - dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_doff = (dccp_header_size + + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESET; dh->dccph_x = 1; dccp_hdr_set_seq(dh, dp->dccps_gss); @@ -348,7 +357,9 @@ void dccp_send_ack(struct sock *sk) if (skb == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, + DCCP_RTO_MAX); return; } @@ -416,8 +427,10 @@ void dccp_send_sync(struct sock *sk, u64 seq) dccp_transmit_skb(sk, skb); } -/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be - * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. +/* + * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This + * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under + * any circumstances. */ void dccp_send_close(struct sock *sk) { @@ -435,7 +448,8 @@ void dccp_send_close(struct sock *sk) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); skb->csum = 0; - DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? + DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; skb_set_owner_w(skb, sk); dccp_transmit_skb(sk, skb); diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c index 6b414898f0c4..2d9ef5ae0bf6 100644 --- a/net/dccp/packet_history.c +++ b/net/dccp/packet_history.c @@ -55,7 +55,7 @@ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) sprintf(slab_name, dccp_rx_hist_mask, name); hist->dccprxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_rx_hist_entry), + sizeof(struct dccp_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (hist->dccprxh_slab == NULL) @@ -128,7 +128,7 @@ struct dccp_tx_hist *dccp_tx_hist_new(const char *name) sprintf(slab_name, dccp_tx_hist_mask, name); hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), + sizeof(struct dccp_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (hist->dccptxh_slab == NULL) @@ -156,8 +156,8 @@ void dccp_tx_hist_delete(struct dccp_tx_hist *hist) EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); -struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, - const u64 seq) +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) { struct dccp_tx_hist_entry *packet = NULL, *entry; @@ -172,7 +172,8 @@ struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, struct dccp_tx_hist_entry *packet) { struct dccp_tx_hist_entry *next; diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 0056525a656c..489fff45ccdf 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -115,7 +115,8 @@ extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list); -static inline struct dccp_tx_hist_entry *dccp_tx_hist_head(struct list_head *list) +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) { struct dccp_tx_hist_entry *head = NULL; @@ -163,7 +164,8 @@ static inline void dccp_rx_hist_add_entry(struct list_head *list, list_add(&entry->dccphrx_node, list); } -static inline struct dccp_rx_hist_entry *dccp_rx_hist_head(struct list_head *list) +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_head(struct list_head *list) { struct dccp_rx_hist_entry *head = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 877c1e0e3c48..46dd489f66cf 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -255,12 +255,16 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* FIXME */ #if 0 - /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + /* + * Are we at urgent data? Stop if we have read anything or + * have SIGURG pending. + */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + copied = timeo ? sock_intr_errno(timeo) : + -EAGAIN; break; } } @@ -285,7 +289,8 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, dccp_pr_debug("found fin ok!\n"); goto found_fin_ok; } - dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); + dccp_pr_debug("packet_type=%s\n", + dccp_packet_name(dh->dccph_type)); BUG_TRAP(flags & MSG_PEEK); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -439,16 +444,16 @@ out: } static const unsigned char dccp_new_state[] = { - /* current state: new state: action: */ - [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_REQUESTING] = DCCP_CLOSED, - [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_LISTEN] = DCCP_CLOSED, - [DCCP_RESPOND] = DCCP_CLOSED, - [DCCP_CLOSING] = DCCP_CLOSED, - [DCCP_TIME_WAIT] = DCCP_CLOSED, - [DCCP_CLOSED] = DCCP_CLOSED, + /* current state: new state: action: */ + [0] = DCCP_CLOSED, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_REQUESTING] = DCCP_CLOSED, + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_LISTEN] = DCCP_CLOSED, + [DCCP_RESPOND] = DCCP_CLOSED, + [DCCP_CLOSING] = DCCP_CLOSED, + [DCCP_TIME_WAIT] = DCCP_CLOSED, + [DCCP_CLOSED] = DCCP_CLOSED, }; static int dccp_close_state(struct sock *sk) @@ -541,7 +546,8 @@ struct proto_ops inet_dccp_ops = { .getname = inet_getname, .poll = sock_no_poll, .ioctl = inet_ioctl, - .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ + /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .listen = inet_dccp_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, @@ -638,10 +644,10 @@ static int __init dccp_init(void) if (rc) goto out; - dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", - sizeof(struct inet_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + dccp_hashinfo.bind_bucket_cachep = + kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_proto_unregister; @@ -657,14 +663,16 @@ static int __init dccp_init(void) goal = num_physpages >> (23 - PAGE_SHIFT); if (thash_entries) - goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; + goal = (thash_entries * + sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); dccp_hashinfo.ehash_size >>= 1; - while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) + while (dccp_hashinfo.ehash_size & + (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC, ehash_order); @@ -686,7 +694,8 @@ static int __init dccp_init(void) do { dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / sizeof(struct inet_bind_hashbucket); - if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && + bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC, bhash_order); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 9f1f1ab9e2b4..47b1616e6189 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -45,11 +45,13 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) dst_negative_advice(&sk->sk_dst_cache); - retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + retry_until = icsk->icsk_syn_retries ? : + /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; } else { - if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { - /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black - hole detection. :-( + if (icsk->icsk_retransmits >= + /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu + black hole detection. :-( It is place to make it. It is not made. I do not want to make it. It is disguisting. It does not work in any @@ -96,14 +98,17 @@ static void dccp_delack_timer(unsigned long data) /* Try again later. */ icsk->icsk_ack.blocked = 1; NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + jiffies + TCP_DELACK_MIN); goto out; } - if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (sk->sk_state == DCCP_CLOSED || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + icsk->icsk_ack.timeout); goto out; } @@ -112,7 +117,8 @@ static void dccp_delack_timer(unsigned long data) if (inet_csk_ack_scheduled(sk)) { if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. @@ -167,7 +173,7 @@ static void dccp_retransmit_timer(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), - TCP_RTO_MAX); + DCCP_RTO_MAX); goto out; } @@ -175,7 +181,8 @@ static void dccp_retransmit_timer(struct sock *sk) icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, + DCCP_RTO_MAX); if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) __sk_dst_reset(sk); out:; @@ -190,7 +197,8 @@ static void dccp_write_timer(unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + jiffies + (HZ / 20)); goto out; } @@ -198,7 +206,8 @@ static void dccp_write_timer(unsigned long data) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + icsk->icsk_timeout); goto out; } @@ -220,7 +229,8 @@ out: */ static void dccp_response_timer(struct sock *sk) { - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, + DCCP_RTO_MAX); } static void dccp_keepalive_timer(unsigned long data) From 531669a0a9041d60d13920973ef8aa4f743c14a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:35:17 -0300 Subject: [PATCH 399/584] [DCCP]: Rewrite dccp_sendmsg to be more like UDP Based on discussions with Nishida-san. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 226 ++++++++++++++--------------------------------- 1 file changed, 65 insertions(+), 161 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 46dd489f66cf..ed0bf58c8ae0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -214,197 +214,101 @@ out_discard: goto out_release; } -EXPORT_SYMBOL(dccp_sendmsg); - int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { const struct dccp_hdr *dh; - int copied = 0; - unsigned long used; - int err; - int target; /* Read at least this many bytes */ long timeo; lock_sock(sk); - err = -ENOTCONN; - if (sk->sk_state == DCCP_LISTEN) + if (sk->sk_state == DCCP_LISTEN) { + len = -ENOTCONN; goto out; + } timeo = sock_rcvtimeo(sk, nonblock); - /* Urgent data needs to be handled specially. */ - if (flags & MSG_OOB) - goto recv_urg; - - /* FIXME */ -#if 0 - seq = &tp->copied_seq; - if (flags & MSG_PEEK) { - peek_seq = tp->copied_seq; - seq = &peek_seq; - } -#endif - - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - do { - struct sk_buff *skb; - u32 offset; + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - /* FIXME */ -#if 0 - /* - * Are we at urgent data? Stop if we have read anything or - * have SIGURG pending. - */ - if (tp->urg_data && tp->urg_seq == *seq) { - if (copied) - break; - if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : - -EAGAIN; - break; - } + if (skb == NULL) + goto verify_sock_status; + + dh = dccp_hdr(skb); + + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) + goto found_ok_skb; + + if (dh->dccph_type == DCCP_PKT_RESET || + dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_pr_debug("found fin ok!\n"); + len = 0; + goto found_fin_ok; } -#endif - - /* Next get a buffer. */ - - skb = skb_peek(&sk->sk_receive_queue); - do { - if (!skb) - break; - - offset = 0; - dh = dccp_hdr(skb); - - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) - goto found_ok_skb; - - if (dh->dccph_type == DCCP_PKT_RESET || - dh->dccph_type == DCCP_PKT_CLOSE) { - dccp_pr_debug("found fin ok!\n"); - goto found_fin_ok; - } - dccp_pr_debug("packet_type=%s\n", - dccp_packet_name(dh->dccph_type)); - BUG_TRAP(flags & MSG_PEEK); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->sk_receive_queue); - - /* Well, if we have backlog, try to process it now yet. */ - if (copied >= target && !sk->sk_backlog.tail) + dccp_pr_debug("packet_type=%s\n", + dccp_packet_name(dh->dccph_type)); + sk_eat_skb(sk, skb); +verify_sock_status: + if (sock_flag(sk, SOCK_DONE)) { + len = 0; break; - - if (copied) { - if (sk->sk_err || - sk->sk_state == DCCP_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) - break; - } else { - if (sock_flag(sk, SOCK_DONE)) - break; - - if (sk->sk_err) { - copied = sock_error(sk); - break; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; - - if (sk->sk_state == DCCP_CLOSED) { - if (!sock_flag(sk, SOCK_DONE)) { - /* This occurs when user tries to read - * from never connected socket. - */ - copied = -ENOTCONN; - break; - } - break; - } - - if (!timeo) { - copied = -EAGAIN; - break; - } - - if (signal_pending(current)) { - copied = sock_intr_errno(timeo); - break; - } } - /* FIXME: cleanup_rbuf(sk, copied); */ + if (sk->sk_err) { + len = sock_error(sk); + break; + } - if (copied >= target) { - /* Do not sleep, just process backlog. */ - release_sock(sk); - lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + if (sk->sk_shutdown & RCV_SHUTDOWN) { + len = 0; + break; + } + if (sk->sk_state == DCCP_CLOSED) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + len = -ENOTCONN; + break; + } + len = 0; + break; + } + + if (!timeo) { + len = -EAGAIN; + break; + } + + if (signal_pending(current)) { + len = sock_intr_errno(timeo); + break; + } + + sk_wait_data(sk, &timeo); continue; - found_ok_skb: - /* Ok so how much can we use? */ - used = skb->len - offset; - if (len < used) - used = len; + if (len > skb->len) + len = skb->len; + else if (len < skb->len) + msg->msg_flags |= MSG_TRUNC; - if (!(flags & MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; - } + if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { + /* Exception. Bailout! */ + len = -EFAULT; + break; } - - copied += used; - len -= used; - - /* FIXME: tcp_rcv_space_adjust(sk); */ - -//skip_copy: - if (used + offset < skb->len) - continue; - - if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); - continue; found_fin_ok: if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; - - } while (len > 0); - - /* According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. I was just happy when found this 8) --ANK - */ - - /* Clean up data we have read: This will do ACK frames. */ - /* FIXME: cleanup_rbuf(sk, copied); */ - - release_sock(sk); - return copied; - + } while (1); out: release_sock(sk); - return err; - -recv_urg: - /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ - goto out; + return len; } static int inet_dccp_listen(struct socket *sock, int backlog) From 725ba8eee3881e619c8e5a0116f1bdb6480ac2d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:35:39 -0300 Subject: [PATCH 400/584] [DCCP]: Introduce the DCCP Kernel hacking menu Only available if CONFIG_DEBUG_KERNEL is enabled in the "Kernel Hacking" Menu. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 21 +++++++++++++++++++++ net/dccp/ccids/ccid3.c | 9 +++++++++ net/dccp/dccp.h | 7 +++---- net/dccp/options.c | 18 +++++++++--------- net/dccp/proto.c | 22 ++++++++++++++-------- 5 files changed, 56 insertions(+), 21 deletions(-) diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 6760830c490f..3023f702eb87 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -26,4 +26,25 @@ config INET_DCCP_DIAG source "net/dccp/ccids/Kconfig" +menu "DCCP Kernel Hacking" + depends on IP_DCCP=m && DEBUG_KERNEL=y + +config IP_DCCP_DEBUG + bool "DCCP debug messages" + ---help--- + Only use this if you're hacking DCCP. + + Just say N. + +config IP_DCCP_UNLOAD_HACK + depends on IP_DCCP_CCID3=m + bool "DCCP control sock unload hack" + ---help--- + Enable this to be able to unload the dccp module when the it + has only one refcount held, the control sock one. Just execute + "rmmod dccp_ccid3 dccp" + + Just say N. +endmenu + endmenu diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index edf9740d8d82..09274f32a337 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2078,6 +2078,15 @@ module_init(ccid3_module_init); static __exit void ccid3_module_exit(void) { +#ifdef CONFIG_IP_DCCP_UNLOAD_HACK + /* + * Hack to use while developing, so that we get rid of the control + * sock, that is what keeps a refcount on dccp.ko -acme + */ + extern void dccp_ctl_sock_exit(void); + + dccp_ctl_sock_exit(); +#endif ccid_unregister(&ccid3); if (ccid3_tx_hist != NULL) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 62e735f1807d..270f19439964 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -11,14 +11,13 @@ * published by the Free Software Foundation. */ +#include #include #include #include #include -#define DCCP_DEBUG - -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; #define dccp_pr_debug(format, a...) \ @@ -426,7 +425,7 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); diff --git a/net/dccp/options.c b/net/dccp/options.c index 68d6614edcf1..fc363aaeedaf 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -58,7 +58,7 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : "server rx opt: "; #endif @@ -303,7 +303,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; @@ -341,7 +341,7 @@ EXPORT_SYMBOL(dccp_insert_option_elapsed_time); static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif @@ -425,7 +425,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif @@ -504,7 +504,7 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); if (ap != NULL) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG memset(ap->dccpap_buf, 0xFF, len); #endif ap->dccpap_buf_len = len; @@ -526,7 +526,7 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) void dccp_ackpkts_free(struct dccp_ackpkts *ap) { if (ap != NULL) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); #endif kfree(ap); @@ -680,7 +680,7 @@ out_duplicate: return -EILSEQ; } -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { @@ -735,7 +735,7 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, return; if (ackno == ap->dccpap_ack_seqno) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : "server rx ack: "; @@ -794,7 +794,7 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, /* dccp_pr_debug_cat("yes\n"); */ if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ed0bf58c8ae0..be0669242069 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -503,12 +503,16 @@ static int __init dccp_ctl_sock_init(void) return rc; } -static void __exit dccp_ctl_sock_exit(void) +#ifdef CONFIG_IP_DCCP_UNLOAD_HACK +void dccp_ctl_sock_exit(void) { if (dccp_ctl_socket != NULL) sock_release(dccp_ctl_socket); } +EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); +#endif + static int __init init_dccp_v4_mibs(void) { int rc = -ENOMEM; @@ -655,19 +659,21 @@ static const char dccp_del_proto_err_msg[] __exitdata = static void __exit dccp_fini(void) { - dccp_ctl_sock_exit(); - inet_unregister_protosw(&dccp_v4_protosw); if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) printk(dccp_del_proto_err_msg); - /* Free the control endpoint. */ - sock_release(dccp_ctl_socket); - - proto_unregister(&dccp_v4_prot); - + free_percpu(dccp_statistics[0]); + free_percpu(dccp_statistics[1]); + free_pages((unsigned long)dccp_hashinfo.bhash, + get_order(dccp_hashinfo.bhash_size * + sizeof(struct inet_bind_hashbucket))); + free_pages((unsigned long)dccp_hashinfo.ehash, + get_order(dccp_hashinfo.ehash_size * + sizeof(struct inet_ehash_bucket))); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + proto_unregister(&dccp_v4_prot); } module_init(dccp_init); From 8649b0d4166e6e80ffa298e75abd8f2afdd491a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:36:01 -0300 Subject: [PATCH 401/584] [DCCP]: Fix RESET handling in dccp_rcv_state_process To avoid holding TIMEWAIT state for sockets in the LISTEN state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 60 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 4b8638f153a5..9dadfc362511 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -402,7 +402,48 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const int old_state = sk->sk_state; int queued = 0; - if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { + /* + * Step 3: Process LISTEN state + * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) + * + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie + * option, + * * Must scan the packet's options to check for an Init + * Cookie. Only the Init Cookie is processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies * + * * Generate a new socket and switch to that socket * + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Continue with S.state == RESPOND + * * A Response packet will be generated in Step 11 * + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process + */ + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { + if (dccp_v4_conn_request(sk, skb) < 0) + return 1; + + /* FIXME: do congestion control initialization */ + goto discard; + } + if (dh->dccph_type == DCCP_PKT_RESET) + goto discard; + + /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + return 1; + } + + if (sk->sk_state != DCCP_REQUESTING) { if (dccp_check_seqno(sk, skb)) goto discard; @@ -484,23 +525,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case DCCP_CLOSED: return 1; - case DCCP_LISTEN: - if (dh->dccph_type == DCCP_PKT_ACK || - dh->dccph_type == DCCP_PKT_DATAACK) - return 1; - - if (dh->dccph_type == DCCP_PKT_RESET) - goto discard; - - if (dh->dccph_type == DCCP_PKT_REQUEST) { - if (dccp_v4_conn_request(sk, skb) < 0) - return 1; - - /* FIXME: do congestion control initialization */ - goto discard; - } - goto discard; - case DCCP_REQUESTING: /* FIXME: do congestion control initialization */ From a1d3a35518779df0579dd9de0121354b49c68ddc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 22:42:25 -0300 Subject: [PATCH 402/584] [DCCP]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 13 ++++++++----- net/dccp/dccp.h | 13 +++++++++---- net/dccp/ipv4.c | 7 ++----- net/dccp/options.c | 3 ++- net/dccp/packet_history.h | 12 ++++++------ net/dccp/proto.c | 8 +++++--- 6 files changed, 32 insertions(+), 24 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 09274f32a337..21948d023c72 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -82,12 +82,13 @@ enum ccid3_options { static int ccid3_debug; -struct dccp_tx_hist *ccid3_tx_hist; -struct dccp_rx_hist *ccid3_rx_hist; +static struct dccp_tx_hist *ccid3_tx_hist; +static struct dccp_rx_hist *ccid3_rx_hist; static kmem_cache_t *ccid3_loss_interval_hist_slab; -static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) +static inline struct ccid3_loss_interval_hist_entry * + ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) { return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); } @@ -1593,7 +1594,9 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) * These are integers as per section 8 of RFC3448. We can then divide by 4 * * when we use it. */ -const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; +static const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { + 4, 4, 4, 4, 3, 2, 1, 1, +}; /* * args: fvalue - function value to match @@ -1601,7 +1604,7 @@ const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; * * both fvalue and p are multiplied by 1,000,000 to use ints */ -u32 calcx_reverse_lookup(u32 fvalue) { +static u32 calcx_reverse_lookup(u32 fvalue) { int ctr = 0; int small; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 270f19439964..148e8a65a10c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -231,19 +231,22 @@ extern void dccp_close(struct sock *sk, long timeout); extern struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req); +extern struct sk_buff *dccp_make_reset(struct sock *sk, + struct dst_entry *dst, + enum dccp_reset_codes code); extern int dccp_connect(struct sock *sk); extern int dccp_disconnect(struct sock *sk, int flags); extern int dccp_getsockopt(struct sock *sk, int level, int optname, - char *optval, int *optlen); + char __user *optval, int __user *optlen); +extern int dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -extern int dccp_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen); extern void dccp_shutdown(struct sock *sk, int how); extern int dccp_v4_checksum(const struct sk_buff *skb, @@ -419,7 +422,9 @@ struct dccp_ackpkts { u8 dccpap_buf[0]; }; -extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); +extern struct dccp_ackpkts * + dccp_ackpkts_alloc(unsigned int len, + const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 42d9c878d4c3..bc3cfc0533cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -617,9 +617,6 @@ out: sock_put(sk); } -extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, - enum dccp_reset_codes code); - int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) { struct sk_buff *skb; @@ -881,7 +878,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } -void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -1268,7 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) return 0; } -int dccp_v4_destroy_sock(struct sock *sk) +static int dccp_v4_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/options.c b/net/dccp/options.c index fc363aaeedaf..d87d6be7ab10 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -499,7 +499,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } -struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) +struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, + const unsigned int __nocast priority) { struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 489fff45ccdf..2e5ba343e3dd 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -79,8 +79,8 @@ extern struct dccp_rx_hist_entry * dccp_rx_hist_find_data_packet(const struct list_head *list); static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const int prio) + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const unsigned int __nocast prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -127,10 +127,10 @@ static inline struct dccp_tx_hist_entry * } static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const u32 ndp, - const struct sk_buff *skb, - const int prio) + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const unsigned int __nocast prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index be0669242069..0b715ceb38b5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -147,7 +147,7 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) } int dccp_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) + char __user *optval, int optlen) { dccp_pr_debug("entry\n"); @@ -158,7 +158,7 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, } int dccp_getsockopt(struct sock *sk, int level, int optname, - char *optval, int *optlen) + char __user *optval, int __user *optlen) { dccp_pr_debug("entry\n"); @@ -439,7 +439,7 @@ void dccp_shutdown(struct sock *sk, int how) dccp_pr_debug("entry\n"); } -struct proto_ops inet_dccp_ops = { +static struct proto_ops inet_dccp_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -539,9 +539,11 @@ static int thash_entries; module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); +#ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; module_param(dccp_debug, int, 0444); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); +#endif static int __init dccp_init(void) { From a10cedd4b905236603c6c4fd77cf338ebbfb1a60 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 21:05:53 -0300 Subject: [PATCH 403/584] [DCCP]: Fix compiler warnings may be a false warning if there always is something on ccid3hcrx_hist: net/dccp/ccids/ccid3.c: In function 'ccid3_hc_rx_packet_recv': net/dccp/ccids/ccid3.c:1634: warning: 'tstamp.tv_usec' may be used uninitialized in this function net/dccp/ccids/ccid3.c:1634: warning: 'tstamp.tv_sec' may be used uninitialized in this function const on inline functions doesn't have any effect: net/dccp/dccp.h:64: warning: type qualifiers ignored on function return type net/dccp/dccp.h:70: warning: type qualifiers ignored on function return type net/dccp/dccp.h:76: warning: type qualifiers ignored on function return type Signed-off-by: Patrick McHardy Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 2 +- net/dccp/dccp.h | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 21948d023c72..2dd3e94ba8f4 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1634,7 +1634,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; - struct timeval tstamp, tmp_tv; + struct timeval tstamp = { 0 }, tmp_tv; int interval = 0; int win_count = 0; int step = 0; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 148e8a65a10c..fff794c8dfff 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -60,20 +60,19 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern struct proto dccp_v4_prot; /* is seq1 < seq2 ? */ -static inline const int before48(const u64 seq1, const u64 seq2) +static inline int before48(const u64 seq1, const u64 seq2) { - return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; + return (s64)((seq1 << 16) - (seq2 << 16)) < 0; } /* is seq1 > seq2 ? */ -static inline const int after48(const u64 seq1, const u64 seq2) +static inline int after48(const u64 seq1, const u64 seq2) { - return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; + return (s64)((seq2 << 16) - (seq1 << 16)) < 0; } /* is seq2 <= seq1 <= seq3 ? */ -static inline const int between48(const u64 seq1, const u64 seq2, - const u64 seq3) +static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) { return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); } From 7de76272b54e3677bcd247d1e1809015236a298d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 14 Aug 2005 18:01:08 -0700 Subject: [PATCH 404/584] [IPX]: Fix build error in ipx_recvmsg() Missing semicolon introduced by skb->stamp changeset: d3258b7d8ed96f97032639bc745179f1951b0da5 Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index c54f8acc97eb..180e383f707c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1797,7 +1797,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (rc) goto out_free; if (skb->tstamp.off_sec) - skb_get_timestamp(skb, &sk->sk_stamp) + skb_get_timestamp(skb, &sk->sk_stamp); msg->msg_namelen = sizeof(*sipx); From ad93e266a17c6f606e96304c866eb73665ae34fa Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Sun, 14 Aug 2005 19:24:58 -0700 Subject: [PATCH 405/584] [NETLINK]: w1_int.c: fix default netlink group w1 does not need to multicast its state to several groups at once, and upcoming netlink changes will not allow bitmask for groups anyway. Signed-off-by: Evgeniy Polyakov Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 8809788dac26..f3f339d057f9 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -86,7 +86,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->driver = driver; - dev->groups = 23; + dev->groups = 1; dev->seq = 1; dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { From 43e943c32b9213b5d25407b281c94aaa474fd9a6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:25:47 -0700 Subject: [PATCH 406/584] [NETLINK]: Fix missing dst_groups initializations in netlink_broadcast users netlink_broadcast users must initialize NETLINK_CB(skb).dst_groups to the destination group mask for netlink_recvmsg. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 1 + net/xfrm/xfrm_user.c | 8 ++++++++ security/selinux/netlink.c | 1 + 3 files changed, 10 insertions(+) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 88f4d746aa05..bc000619f4f8 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -93,6 +93,7 @@ static int send_uevent(const char *signal, const char *obj, } } + NETLINK_CB(skb).dst_groups = 1; return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 33ceeea783b1..4d553a1d2169 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1152,6 +1152,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; + NETLINK_CB(skb).dst_groups = XFRMGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); nlmsg_failure: @@ -1226,6 +1228,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlh->nlmsg_len = skb->tail - b; + NETLINK_CB(skb).dst_groups = XFRMGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); nlmsg_failure: @@ -1455,6 +1459,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlh->nlmsg_len = skb->tail - b; + NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); nlmsg_failure: @@ -1480,6 +1486,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; + NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); nlmsg_failure: diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 341dbe2579be..0f7be6524555 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -80,6 +80,7 @@ static void selnl_notify(int msgtype, void *data) nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); selnl_add_payload(nlh, len, msgtype, data); nlh->nlmsg_len = skb->tail - tmp; + NETLINK_CB(skb).dst_groups = SELNL_GRP_AVC; netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER); out: return; From db080529798b497eb5a37b92a25e966be5a7dd5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:26:34 -0700 Subject: [PATCH 407/584] [NETLINK]: Remove unused groups member from struct netlink_skb_parms Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - net/ipv4/fib_frontend.c | 1 - net/netlink/af_netlink.c | 1 - 3 files changed, 3 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d5e09bcd80f9..eab51f9c9c86 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -106,7 +106,6 @@ struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ __u32 pid; - __u32 groups; __u32 dst_pid; __u32 dst_groups; kernel_cap_t eff_cap; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5e2f1550c91..75d03e37b9a8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -558,7 +558,6 @@ static void nl_fib_input(struct sock *sk, int len) nl_fib_lookup(frn, tb); pid = nlh->nlmsg_pid; /*pid of sending process */ - NETLINK_CB(skb).groups = 0; /* not in mcast group */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_pid = pid; NETLINK_CB(skb).dst_groups = 0; /* unicast */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5d487cd69c8c..7b7b45a19597 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,7 +950,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; NETLINK_CB(skb).pid = nlk->pid; - NETLINK_CB(skb).groups = nlk->groups; NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_groups = dst_groups; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); From 77247bbb3094246be9d057e7be442cc708f123a8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:27:13 -0700 Subject: [PATCH 408/584] [NETLINK]: Fix module refcounting problems Use-after-free: the struct proto_ops containing the module pointer is freed when a socket with pid=0 is released, which besides for kernel sockets is true for all unbound sockets. Module refcount leak: when the kernel socket is closed before all user sockets have been closed the proto_ops struct for this family is replaced by the generic one and the module refcount can't be dropped. The second problem can't be solved cleanly using module refcounting in the generic socket code, so this patch adds explicit refcounting to netlink_create/netlink_release. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 102 ++++++++++++++------------------------- 1 file changed, 36 insertions(+), 66 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b7b45a19597..c41a88100fea 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -73,8 +73,12 @@ struct netlink_sock { struct netlink_callback *cb; spinlock_t cb_lock; void (*data_ready)(struct sock *sk, int bytes); + struct module *module; + u32 flags; }; +#define NETLINK_KERNEL_SOCKET 0x1 + static inline struct netlink_sock *nlk_sk(struct sock *sk) { return (struct netlink_sock *)sk; @@ -97,7 +101,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; - struct proto_ops *p_ops; + struct module *module; }; static struct netlink_table *nl_table; @@ -338,6 +342,7 @@ static int netlink_create(struct socket *sock, int protocol) { struct sock *sk; struct netlink_sock *nlk; + struct module *module; sock->state = SS_UNCONNECTED; @@ -347,30 +352,36 @@ static int netlink_create(struct socket *sock, int protocol) if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - netlink_table_grab(); + netlink_lock_table(); if (!nl_table[protocol].hash.entries) { #ifdef CONFIG_KMOD /* We do 'best effort'. If we find a matching module, * it is loaded. If not, we don't return an error to * allow pure userspace<->userspace communication. -HW */ - netlink_table_ungrab(); + netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); - netlink_table_grab(); + netlink_lock_table(); #endif } - netlink_table_ungrab(); + module = nl_table[protocol].module; + if (!try_module_get(module)) + module = NULL; + netlink_unlock_table(); - sock->ops = nl_table[protocol].p_ops; + sock->ops = &netlink_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); - if (!sk) + if (!sk) { + module_put(module); return -ENOMEM; + } sock_init_data(sock, sk); nlk = nlk_sk(sk); + nlk->module = module; spin_lock_init(&nlk->cb_lock); init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -415,22 +426,15 @@ static int netlink_release(struct socket *sock) notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } - /* When this is a kernel socket, we need to remove the owner pointer, - * since we don't know whether the module will be dying at any given - * point - HW - */ - if (!nlk->pid) { - struct proto_ops *p_tmp; + if (nlk->module) + module_put(nlk->module); + if (nlk->flags & NETLINK_KERNEL_SOCKET) { netlink_table_grab(); - p_tmp = nl_table[sk->sk_protocol].p_ops; - if (p_tmp != &netlink_ops) { - nl_table[sk->sk_protocol].p_ops = &netlink_ops; - kfree(p_tmp); - } + nl_table[sk->sk_protocol].module = NULL; netlink_table_ungrab(); } - + sock_put(sk); return 0; } @@ -1060,9 +1064,9 @@ static void netlink_data_ready(struct sock *sk, int len) struct sock * netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) { - struct proto_ops *p_ops; struct socket *sock; struct sock *sk; + struct netlink_sock *nlk; if (!nl_table) return NULL; @@ -1070,64 +1074,32 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct if (unit<0 || unit>=MAX_LINKS) return NULL; - /* Do a quick check, to make us not go down to netlink_insert() - * if protocol already has kernel socket. - */ - sk = netlink_lookup(unit, 0); - if (unlikely(sk)) { - sock_put(sk); - return NULL; - } - if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - sk = NULL; - if (module) { - /* Every registering protocol implemented in a module needs - * it's own p_ops, since the socket code cannot deal with - * module refcounting otherwise. -HW - */ - p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); - if (!p_ops) - goto out_sock_release; - - memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); - p_ops->owner = module; - } else - p_ops = &netlink_ops; - - netlink_table_grab(); - nl_table[unit].p_ops = p_ops; - netlink_table_ungrab(); - - if (netlink_create(sock, unit) < 0) { - sk = NULL; - goto out_kfree_p_ops; - } + if (netlink_create(sock, unit) < 0) + goto out_sock_release; sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) { - sk = NULL; - goto out_kfree_p_ops; - } + if (netlink_insert(sk, 0)) + goto out_sock_release; + + nlk = nlk_sk(sk); + nlk->flags |= NETLINK_KERNEL_SOCKET; + + netlink_table_grab(); + nl_table[unit].module = module; + netlink_table_ungrab(); return sk; -out_kfree_p_ops: - netlink_table_grab(); - if (nl_table[unit].p_ops != &netlink_ops) { - kfree(nl_table[unit].p_ops); - nl_table[unit].p_ops = &netlink_ops; - } - netlink_table_ungrab(); out_sock_release: sock_release(sock); - return sk; + return NULL; } void netlink_set_nonroot(int protocol, unsigned int flags) @@ -1490,8 +1462,6 @@ enomem: for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; - nl_table[i].p_ops = &netlink_ops; - hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) From d629b836d151d43332492651dd841d32e57ebe3b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:27:50 -0700 Subject: [PATCH 409/584] [NETLINK]: Use group numbers instead of bitmasks internally Using the group number allows increasing the number of groups without beeing limited by the size of the bitmask. It introduces one limitation for netlink users: messages can't be broadcasted to multiple groups anymore, however this feature was never used inside the kernel. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 35 ++++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index eab51f9c9c86..c724c9d4984a 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -107,7 +107,7 @@ struct netlink_skb_parms struct ucred creds; /* Skb credentials */ __u32 pid; __u32 dst_pid; - __u32 dst_groups; + __u32 dst_group; kernel_cap_t eff_cap; __u32 loginuid; /* Login (audit) uid */ }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c41a88100fea..3c56b96b4a4b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,7 +67,7 @@ struct netlink_sock { u32 pid; unsigned int groups; u32 dst_pid; - unsigned int dst_groups; + u32 dst_group; unsigned long state; wait_queue_head_t wait; struct netlink_callback *cb; @@ -116,6 +116,11 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static struct notifier_block *netlink_chain; +static u32 netlink_group_mask(u32 group) +{ + return group ? 1 << (group - 1) : 0; +} + static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) { return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; @@ -533,7 +538,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; nlk->dst_pid = 0; - nlk->dst_groups = 0; + nlk->dst_group = 0; return 0; } if (addr->sa_family != AF_NETLINK) @@ -549,7 +554,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (err == 0) { sk->sk_state = NETLINK_CONNECTED; nlk->dst_pid = nladdr->nl_pid; - nlk->dst_groups = nladdr->nl_groups; + nlk->dst_group = ffs(nladdr->nl_groups); } return err; @@ -567,10 +572,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr if (peer) { nladdr->nl_pid = nlk->dst_pid; - nladdr->nl_groups = nlk->dst_groups; + nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups; + nladdr->nl_groups = nlk->groups; } return 0; } @@ -771,7 +776,7 @@ static inline int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) goto out; if (p->failure) { @@ -867,7 +872,7 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) goto out; sk->sk_err = p->code; @@ -913,7 +918,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr=msg->msg_name; u32 dst_pid; - u32 dst_groups; + u32 dst_group; struct sk_buff *skb; int err; struct scm_cookie scm; @@ -931,12 +936,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (addr->nl_family != AF_NETLINK) return -EINVAL; dst_pid = addr->nl_pid; - dst_groups = addr->nl_groups; - if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND)) + dst_group = ffs(addr->nl_groups); + if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; } else { dst_pid = nlk->dst_pid; - dst_groups = nlk->dst_groups; + dst_group = nlk->dst_group; } if (!nlk->pid) { @@ -955,7 +960,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_pid = dst_pid; - NETLINK_CB(skb).dst_groups = dst_groups; + NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); @@ -977,9 +982,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (dst_groups) { + if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); } err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); @@ -1025,7 +1030,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).pid; - addr->nl_groups = NETLINK_CB(skb).dst_groups; + addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } From ac6d439d2097b72ea0cbc2322ce1263a38bc1fd0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:29:52 -0700 Subject: [PATCH 410/584] [NETLINK]: Convert netlink users to use group numbers instead of bitmasks Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_netlink.c | 2 +- include/linux/netfilter/nfnetlink.h | 23 ++++++++++++- include/linux/netfilter_decnet.h | 14 ++++++++ include/linux/rtnetlink.h | 42 +++++++++++++++++++++-- include/linux/selinux_netlink.h | 13 ++++++- include/linux/xfrm.h | 18 ++++++++++ lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 4 +-- net/core/neighbour.c | 8 ++--- net/core/rtnetlink.c | 6 ++-- net/core/wireless.c | 4 +-- net/decnet/dn_dev.c | 8 ++--- net/decnet/dn_table.c | 4 +-- net/decnet/netfilter/dn_rtmsg.c | 6 ++-- net/ipv4/devinet.c | 7 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 4 +-- net/ipv4/netfilter/ip_conntrack_netlink.c | 12 +++---- net/ipv4/netfilter/ipt_ULOG.c | 8 ++--- net/ipv6/addrconf.c | 24 ++++++------- net/ipv6/route.c | 8 ++--- net/netfilter/nfnetlink.c | 2 +- net/sched/act_api.c | 8 ++--- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 4 +-- net/xfrm/xfrm_user.c | 35 ++++++++----------- security/selinux/netlink.c | 4 +-- 27 files changed, 183 insertions(+), 91 deletions(-) diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 2a82fb055c70..e7b774423dd6 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -51,7 +51,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg) memcpy(data, msg, sizeof(struct w1_netlink_msg)); - NETLINK_CB(skb).dst_groups = dev->groups; + NETLINK_CB(skb).dst_group = dev->groups; netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC); nlmsg_failure: diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b0feb2374079..1d5b10ae2399 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -2,13 +2,34 @@ #define _NFNETLINK_H #include -/* nfnetlink groups: Up to 32 maximum */ +#ifndef __KERNEL__ +/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */ #define NF_NETLINK_CONNTRACK_NEW 0x00000001 #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 +#endif + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) /* Generic structure for encapsulation optional netfilter information. * It is reminiscent of sockaddr, but with sa_family replaced diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 018979484150..6f425369ee29 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -56,7 +56,21 @@ struct nf_dn_rtmsg { #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define DNRMG_L1_GROUP 0x01 #define DNRMG_L2_GROUP 0x02 +#endif + +enum { + DNRNG_NLGRP_NONE, +#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE + DNRNG_NLGRP_L1, +#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 + DNRNG_NLGRP_L2, +#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 + __DNRNG_NLGRP_MAX +}; +#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) #endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 657c05ab8f9e..c231e9a08f0b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -826,9 +826,8 @@ enum #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) - -/* RTnetlink multicast groups */ - +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 @@ -847,6 +846,43 @@ enum #define RTMGRP_DECnet_ROUTE 0x4000 #define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) /* TC action piece */ struct tcamsg diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h index 957e6ebca4e6..bbf489decd84 100644 --- a/include/linux/selinux_netlink.h +++ b/include/linux/selinux_netlink.h @@ -20,10 +20,21 @@ enum { SELNL_MSG_MAX }; -/* Multicast groups */ +#ifndef __KERNEL__ +/* Multicast groups - backwards compatiblility for userspace */ #define SELNL_GRP_NONE 0x00000000 #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ #define SELNL_GRP_ALL 0xffffffff +#endif + +enum selinux_nlgroups { + SELNLGRP_NONE, +#define SELNLGRP_NONE SELNLGRP_NONE + SELNLGRP_AVC, +#define SELNLGRP_AVC SELNLGRP_AVC + __SELNLGRP_MAX +}; +#define SELNLGRP_MAX (__SELNLGRP_MAX - 1) /* Message structures */ struct selnl_msg_setenforce { diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0d423300d84..0fb077d68441 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -258,9 +258,27 @@ struct xfrm_usersa_flush { __u8 proto; }; +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#endif + +enum xfrm_nlgroups { + XFRMNLGRP_NONE, +#define XFRMNLGRP_NONE XFRMNLGRP_NONE + XFRMNLGRP_ACQUIRE, +#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE + XFRMNLGRP_EXPIRE, +#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE + XFRMNLGRP_SA, +#define XFRMNLGRP_SA XFRMNLGRP_SA + XFRMNLGRP_POLICY, +#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY + __XFRMNLGRP_MAX +}; +#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) #endif /* _LINUX_XFRM_H */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index bc000619f4f8..1ebd735d6439 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -93,7 +93,7 @@ static int send_uevent(const char *signal, const char *obj, } } - NETLINK_CB(skb).dst_groups = 1; + NETLINK_CB(skb).dst_group = 1; return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index acb888d32587..6845b5dd6d77 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; - netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroup + 1; + netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 72ee00f7b30c..39fc55edf691 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2343,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n) } nlh = (struct nlmsghdr *)skb->data; nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) @@ -2361,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n) return; } nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9b3c61f1a37d..5f3f95b5585d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); @@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); } static int rtnetlink_done(struct netlink_callback *cb) diff --git a/net/core/wireless.c b/net/core/wireless.c index 3ff5639c0b78..19fa6a5389b3 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1144,8 +1144,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); } #endif /* WE_EVENT_NETLINK */ diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 00233ecbc9cb..5610bb16dbf9 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) skb = alloc_skb(size, GFP_KERNEL); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); return; } if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 28ba5777a25a..73a88489ff3e 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; if (nlh->nlmsg_flags & NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); if (nlh->nlmsg_flags & NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 3068fddb2da3..353fed6888f9 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -71,10 +71,10 @@ static void dnrmg_send_peer(struct sk_buff *skb) switch(flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_L1RT: - group = DNRMG_L1_GROUP; + group = DNRMG_L1_NLGRP; break; case DN_RT_PKT_L2RT: - group = DNRMG_L2_GROUP; + group = DNRMG_L2_NLGRP; break; default: return; @@ -83,7 +83,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) skb2 = dnrmg_build_message(skb, &status); if (skb2 == NULL) return; - NETLINK_CB(skb2).dst_groups = group; + NETLINK_CB(skb2).dst_group = group; netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d8a10e3dd77d..ba2895ae8151 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); if (!skb) - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); } else { - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 75d03e37b9a8..d4e7b578a25d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -560,7 +560,7 @@ static void nl_fib_input(struct sock *sk, int len) pid = nlh->nlmsg_pid; /*pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_pid = pid; - NETLINK_CB(skb).dst_groups = 0; /* unicast */ + NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e278cb9d0075..7e4651b3caa8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; if (n->nlmsg_flags&NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); if (n->nlmsg_flags&NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 1221a9c8bac2..a4e9278db4ed 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -297,7 +297,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct sk_buff *skb; unsigned int type; unsigned char *b; - unsigned int flags = 0, groups; + unsigned int flags = 0, group; /* ignore our fake conntrack entry */ if (ct == &ip_conntrack_untracked) @@ -305,7 +305,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; - groups = NF_NETLINK_CONNTRACK_DESTROY; + group = NFNLGRP_CONNTRACK_DESTROY; goto alloc_skb; } if (events & (IPCT_NEW | IPCT_RELATED)) { @@ -313,7 +313,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, flags = NLM_F_CREATE|NLM_F_EXCL; /* dump everything */ events = ~0UL; - groups = NF_NETLINK_CONNTRACK_NEW; + group = NFNLGRP_CONNTRACK_NEW; goto alloc_skb; } if (events & (IPCT_STATUS | @@ -322,7 +322,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, IPCT_HELPINFO | IPCT_NATINFO)) { type = IPCTNL_MSG_CT_NEW; - groups = NF_NETLINK_CONNTRACK_UPDATE; + group = NFNLGRP_CONNTRACK_UPDATE; goto alloc_skb; } @@ -375,7 +375,7 @@ alloc_skb: goto nfattr_failure; nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, groups, 0); + nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; nlmsg_failure: @@ -1194,7 +1194,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nlh->nlmsg_len = skb->tail - b; proto = exp->tuple.dst.protonum; - nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); return NOTIFY_DONE; nlmsg_failure: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1d8ac4595e17..89816b83455e 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -116,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum); - DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n", - ub->qlen, nlgroupnum); - netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; + DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); + netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b9c3da349492..493abf94bcfc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); return; } if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); return; } if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, @@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); return; } if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 878789b3122d..6ea494ab4e02 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, skb = alloc_skb(size, gfp_any()); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); return; } if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any()); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); } /* diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 84efffdbade3..36a4c5fbb7d7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -198,7 +198,7 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(nfnl, skb, pid, group, allocation); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c896a0118a32..8aebe8f6d271 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -593,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -656,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, pid, RTMGRP_TC, + ret = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -698,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, x->rta_len = skb->tail - (u8*)x; nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = RTMGRP_TC; + NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3b5714ef4d1a..b4d89fbb3782 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b9a069af4a02..737681cb9a92 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4d553a1d2169..0579d209af27 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -1152,9 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -1228,9 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1308,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -1409,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -1459,9 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1486,9 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 0f7be6524555..20f481015db4 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -80,8 +80,8 @@ static void selnl_notify(int msgtype, void *data) nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); selnl_add_payload(nlh, len, msgtype, data); nlh->nlmsg_len = skb->tail - tmp; - NETLINK_CB(skb).dst_groups = SELNL_GRP_AVC; - netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER); + NETLINK_CB(skb).dst_group = SELNLGRP_AVC; + netlink_broadcast(selnl, skb, 0, SELNLGRP_AVC, GFP_USER); out: return; From ab33a1711cf60bfb562b1ab89ac9f23d1425e8b1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:31:36 -0700 Subject: [PATCH 411/584] [NETLINK]: Return -EPROTONOSUPPORT in netlink_create() if no kernel socket is registered This is necessary for dynamic number of netlink groups to make sure we know the number of possible groups before bind() is called. With this change pure userspace communication using unused netlink protocols becomes impossible. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 72 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c56b96b4a4b..444ed223ee43 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -102,6 +102,7 @@ struct netlink_table { struct hlist_head mc_list; unsigned int nl_nonroot; struct module *module; + int registered; }; static struct netlink_table *nl_table; @@ -343,11 +344,32 @@ static struct proto netlink_proto = { .obj_size = sizeof(struct netlink_sock), }; -static int netlink_create(struct socket *sock, int protocol) +static int __netlink_create(struct socket *sock, int protocol) { struct sock *sk; struct netlink_sock *nlk; - struct module *module; + + sock->ops = &netlink_ops; + + sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + if (!sk) + return -ENOMEM; + + sock_init_data(sock, sk); + + nlk = nlk_sk(sk); + spin_lock_init(&nlk->cb_lock); + init_waitqueue_head(&nlk->wait); + + sk->sk_destruct = netlink_sock_destruct; + sk->sk_protocol = protocol; + return 0; +} + +static int netlink_create(struct socket *sock, int protocol) +{ + struct module *module = NULL; + int err = 0; sock->state = SS_UNCONNECTED; @@ -358,41 +380,33 @@ static int netlink_create(struct socket *sock, int protocol) return -EPROTONOSUPPORT; netlink_lock_table(); - if (!nl_table[protocol].hash.entries) { #ifdef CONFIG_KMOD - /* We do 'best effort'. If we find a matching module, - * it is loaded. If not, we don't return an error to - * allow pure userspace<->userspace communication. -HW - */ + if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); netlink_lock_table(); -#endif } - module = nl_table[protocol].module; - if (!try_module_get(module)) - module = NULL; +#endif + if (nl_table[protocol].registered && + try_module_get(nl_table[protocol].module)) + module = nl_table[protocol].module; + else + err = -EPROTONOSUPPORT; netlink_unlock_table(); - sock->ops = &netlink_ops; + if (err) + goto out; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); - if (!sk) { - module_put(module); - return -ENOMEM; - } + if ((err = __netlink_create(sock, protocol) < 0)) + goto out_module; - sock_init_data(sock, sk); + nlk_sk(sock->sk)->module = module; +out: + return err; - nlk = nlk_sk(sk); - - nlk->module = module; - spin_lock_init(&nlk->cb_lock); - init_waitqueue_head(&nlk->wait); - sk->sk_destruct = netlink_sock_destruct; - - sk->sk_protocol = protocol; - return 0; +out_module: + module_put(module); + goto out; } static int netlink_release(struct socket *sock) @@ -437,6 +451,7 @@ static int netlink_release(struct socket *sock) if (nlk->flags & NETLINK_KERNEL_SOCKET) { netlink_table_grab(); nl_table[sk->sk_protocol].module = NULL; + nl_table[sk->sk_protocol].registered = 0; netlink_table_ungrab(); } @@ -1082,7 +1097,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (netlink_create(sock, unit) < 0) + if (__netlink_create(sock, unit) < 0) goto out_sock_release; sk = sock->sk; @@ -1098,6 +1113,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct netlink_table_grab(); nl_table[unit].module = module; + nl_table[unit].registered = 1; netlink_table_ungrab(); return sk; From f7fa9b10edbb9391bdd4ec8e8b3d621d0664b198 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:29:13 -0700 Subject: [PATCH 412/584] [NETLINK]: Support dynamic number of multicast groups per netlink family Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 71 +++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 444ed223ee43..58d4ca42ac32 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -60,21 +60,24 @@ #include #define Nprintk(a...) +#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; u32 pid; - unsigned int groups; u32 dst_pid; u32 dst_group; + u32 flags; + u32 subscriptions; + u32 ngroups; + unsigned long *groups; unsigned long state; wait_queue_head_t wait; struct netlink_callback *cb; spinlock_t cb_lock; void (*data_ready)(struct sock *sk, int bytes); struct module *module; - u32 flags; }; #define NETLINK_KERNEL_SOCKET 0x1 @@ -101,6 +104,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + unsigned int groups; struct module *module; int registered; }; @@ -138,6 +142,7 @@ static void netlink_sock_destruct(struct sock *sk) BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); BUG_TRAP(!nlk_sk(sk)->cb); + BUG_TRAP(!nlk_sk(sk)->groups); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. @@ -333,7 +338,7 @@ static void netlink_remove(struct sock *sk) netlink_table_grab(); if (sk_del_node_init(sk)) nl_table[sk->sk_protocol].hash.entries--; - if (nlk_sk(sk)->groups) + if (nlk_sk(sk)->subscriptions) __sk_del_bind_node(sk); netlink_table_ungrab(); } @@ -369,6 +374,8 @@ static int __netlink_create(struct socket *sock, int protocol) static int netlink_create(struct socket *sock, int protocol) { struct module *module = NULL; + struct netlink_sock *nlk; + unsigned int groups; int err = 0; sock->state = SS_UNCONNECTED; @@ -392,15 +399,23 @@ static int netlink_create(struct socket *sock, int protocol) module = nl_table[protocol].module; else err = -EPROTONOSUPPORT; + groups = nl_table[protocol].groups; netlink_unlock_table(); - if (err) - goto out; - - if ((err = __netlink_create(sock, protocol) < 0)) + if (err || (err = __netlink_create(sock, protocol) < 0)) goto out_module; - nlk_sk(sock->sk)->module = module; + nlk = nlk_sk(sock->sk); + + nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); + if (nlk->groups == NULL) { + err = -ENOMEM; + goto out_module; + } + memset(nlk->groups, 0, NLGRPSZ(groups)); + nlk->ngroups = groups; + + nlk->module = module; out: return err; @@ -437,7 +452,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid && !nlk->groups) { + if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { .protocol = sk->sk_protocol, .pid = nlk->pid, @@ -455,6 +470,9 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } + kfree(nlk->groups); + nlk->groups = NULL; + sock_put(sk); return 0; } @@ -503,6 +521,18 @@ static inline int netlink_capable(struct socket *sock, unsigned int flag) capable(CAP_NET_ADMIN); } +static void +netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->subscriptions && !subscriptions) + __sk_del_bind_node(sk); + else if (!nlk->subscriptions && subscriptions) + sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); + nlk->subscriptions = subscriptions; +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; @@ -528,15 +558,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return err; } - if (!nladdr->nl_groups && !nlk->groups) + if (!nladdr->nl_groups && !(u32)nlk->groups[0]) return 0; netlink_table_grab(); - if (nlk->groups && !nladdr->nl_groups) - __sk_del_bind_node(sk); - else if (!nlk->groups && nladdr->nl_groups) - sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); - nlk->groups = nladdr->nl_groups; + netlink_update_subscriptions(sk, nlk->subscriptions + + hweight32(nladdr->nl_groups) - + hweight32(nlk->groups[0])); + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; netlink_table_ungrab(); return 0; @@ -590,7 +619,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups; + nladdr->nl_groups = nlk->groups[0]; } return 0; } @@ -791,7 +820,8 @@ static inline int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + !test_bit(p->group - 1, nlk->groups)) goto out; if (p->failure) { @@ -887,7 +917,8 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + !test_bit(p->group - 1, nlk->groups)) goto out; sk->sk_err = p->code; @@ -1112,6 +1143,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); + nl_table[unit].groups = 32; nl_table[unit].module = module; nl_table[unit].registered = 1; netlink_table_ungrab(); @@ -1358,7 +1390,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) s, s->sk_protocol, nlk->pid, - nlk->groups, + nlk->flags & NETLINK_KERNEL_SOCKET ? + 0 : (unsigned int)nlk->groups[0], atomic_read(&s->sk_rmem_alloc), atomic_read(&s->sk_wmem_alloc), nlk->cb, From 9a4595bc7e67962f13232ee55a64e063062c3a99 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:32:15 -0700 Subject: [PATCH 413/584] [NETLINK]: Add set/getsockopt options to support more than 32 groups NETLINK_ADD_MEMBERSHIP/NETLINK_DROP_MEMBERSHIP are used to join/leave groups, NETLINK_PKTINFO is used to enable nl_pktinfo control messages for received packets to get the extended destination group number. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 ++++ include/linux/socket.h | 1 + net/netlink/af_netlink.c | 95 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 2 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c724c9d4984a..36a40449f9f1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -90,6 +90,15 @@ struct nlmsgerr struct nlmsghdr msg; }; +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 + +struct nl_pktinfo +{ + __u32 group; +}; + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { diff --git a/include/linux/socket.h b/include/linux/socket.h index ddf22559f484..acc55aac8a43 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -272,6 +272,7 @@ struct ucred { #define SOL_NETBEUI 267 #define SOL_LLC 268 #define SOL_DCCP 269 +#define SOL_NETLINK 270 /* IPX options */ #define IPX_TYPE 1 diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 58d4ca42ac32..47e791738014 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -81,6 +81,7 @@ struct netlink_sock { }; #define NETLINK_KERNEL_SOCKET 0x1 +#define NETLINK_RECV_PKTINFO 0x2 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -946,6 +947,94 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +static int netlink_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int val = 0, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (optlen >= sizeof(int) && + get_user(val, (int __user *)optval)) + return -EFAULT; + + switch (optname) { + case NETLINK_PKTINFO: + if (val) + nlk->flags |= NETLINK_RECV_PKTINFO; + else + nlk->flags &= ~NETLINK_RECV_PKTINFO; + err = 0; + break; + case NETLINK_ADD_MEMBERSHIP: + case NETLINK_DROP_MEMBERSHIP: { + unsigned int subscriptions; + int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; + + if (!netlink_capable(sock, NL_NONROOT_RECV)) + return -EPERM; + if (!val || val - 1 >= nlk->ngroups) + return -EINVAL; + netlink_table_grab(); + old = test_bit(val - 1, nlk->groups); + subscriptions = nlk->subscriptions - old + new; + if (new) + __set_bit(val - 1, nlk->groups); + else + __clear_bit(val - 1, nlk->groups); + netlink_update_subscriptions(sk, subscriptions); + netlink_table_ungrab(); + err = 0; + break; + } + default: + err = -ENOPROTOOPT; + } + return err; +} + +static int netlink_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int len, val, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < 0) + return -EINVAL; + + switch (optname) { + case NETLINK_PKTINFO: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; + put_user(len, optlen); + put_user(val, optval); + err = 0; + break; + default: + err = -ENOPROTOOPT; + } + return err; +} + +static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct nl_pktinfo info; + + info.group = NETLINK_CB(skb).dst_group; + put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); +} + static inline void netlink_rcv_wake(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1091,6 +1180,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, netlink_dump(sk); scm_recv(sock, msg, siocb->scm, flags); + if (nlk->flags & NETLINK_RECV_PKTINFO) + netlink_cmsg_recv_pktinfo(msg, skb); out: netlink_rcv_wake(sk); @@ -1465,8 +1556,8 @@ static struct proto_ops netlink_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = netlink_setsockopt, + .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, From 066286071d3542243baa68166acb779187c848b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:33:26 -0700 Subject: [PATCH 414/584] [NETLINK]: Add "groups" argument to netlink_kernel_create Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 2 +- include/linux/netlink.h | 2 +- kernel/audit.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 3 ++- net/core/rtnetlink.c | 3 ++- net/decnet/netfilter/dn_rtmsg.c | 4 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 4 ++-- net/netlink/af_netlink.c | 6 ++++-- net/xfrm/xfrm_user.c | 4 ++-- security/selinux/netlink.c | 3 ++- 16 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index f3f339d057f9..498ad505fa5f 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 1; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); + dev->nls = netlink_kernel_create(NETLINK_W1, 1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 36a40449f9f1..7d1d9683b246 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -125,7 +125,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ed4019563d56..7f0699790d46 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,7 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 1ebd735d6439..04ca4429ddfa 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, THIS_MODULE); if (!uevent_sock) { diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 6845b5dd6d77..aae26ae2e61f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,8 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, + NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5f3f95b5585d..9bed7569ce3f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); + rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, + THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 353fed6888f9..afb33a25ea55 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,8 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, - THIS_MODULE); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_sk, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d4e7b578a25d..4e1379f71269 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -566,7 +566,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); + netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1880ad8575d8..71f3c7350c6e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -845,7 +845,7 @@ static int __init inet_diag_init(void) goto out; memset(inet_diag_table, 0, inet_diag_table_size); - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 7f2bcc7198fa..d54f14d926f6 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -671,7 +671,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 89816b83455e..e2c14f3cb2fc 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -388,7 +388,8 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 446764545b10..aa11cf366efa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -667,7 +667,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 36a4c5fbb7d7..e089f17bb803 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -355,8 +355,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, - THIS_MODULE); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47e791738014..e259f46e26f7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1204,7 +1204,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) +netlink_kernel_create(int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct module *module) { struct socket *sock; struct sock *sk; @@ -1234,7 +1236,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = 32; + nl_table[unit].groups = groups < 32 ? 32 : groups; nl_table[unit].module = module; nl_table[unit].registered = 1; netlink_table_ungrab(); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0579d209af27..c35336a0f71b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1520,8 +1520,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, - THIS_MODULE); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + xfrm_netlink_rcv, THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 20f481015db4..e203883406dd 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -104,7 +104,8 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); + selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, + THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); From 216efaaaa006d2f3ecbb5bbc2b6673423813254e Mon Sep 17 00:00:00 2001 From: James Morris Date: Mon, 15 Aug 2005 20:34:48 -0700 Subject: [PATCH 415/584] [SELINUX]: Update for tcp_diag rename to inet_diag. Also, support dccp sockets. Signed-off-by: James Morris Signed-off-by: David S. Miller --- security/selinux/hooks.c | 2 +- security/selinux/nlmsgtab.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2253f388234f..8641f8894b4c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -659,7 +659,7 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_NETLINK_ROUTE_SOCKET; case NETLINK_FIREWALL: return SECCLASS_NETLINK_FIREWALL_SOCKET; - case NETLINK_TCPDIAG: + case NETLINK_INET_DIAG: return SECCLASS_NETLINK_TCPDIAG_SOCKET; case NETLINK_NFLOG: return SECCLASS_NETLINK_NFLOG_SOCKET; diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 92b057becb4b..69b9329b2054 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -76,6 +76,7 @@ static struct nlmsg_perm nlmsg_firewall_perms[] = static struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_xfrm_perms[] = From 63a1222b1fd79c52491c14534b086bffbfaed8bf Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 15 Aug 2005 20:35:44 -0700 Subject: [PATCH 416/584] [DECNET]: Fix build after netlink changes. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/decnet/netfilter/dn_rtmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index afb33a25ea55..1ab94c6e22ed 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -71,10 +72,10 @@ static void dnrmg_send_peer(struct sk_buff *skb) switch(flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_L1RT: - group = DNRMG_L1_NLGRP; + group = DNRNG_NLGRP_L1; break; case DN_RT_PKT_L2RT: - group = DNRMG_L2_NLGRP; + group = DNRNG_NLGRP_L2; break; default: return; From 9deff7f2365958c5c5aa8cb5a0dd651c4dd83f8f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 15 Aug 2005 21:13:25 -0700 Subject: [PATCH 417/584] [RXRPC]: Fix build failure introduced by skb->stamp changes. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/rxrpc/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index 9bce7794130a..122c086ee2db 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c @@ -330,7 +330,7 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans, msg->trans = trans; msg->state = RXRPC_MSG_RECEIVED; - msg->stamp = pkt->stamp; + skb_get_timestamp(pkt, &msg->stamp); if (msg->stamp.tv_sec == 0) { do_gettimeofday(&msg->stamp); if (pkt->sk) From 20380731bc2897f2952ae055420972ded4cd786e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 02:18:02 -0300 Subject: [PATCH 418/584] [NET]: Fix sparse warnings Of this type, mostly: CHECK net/ipv6/netfilter.c net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static? net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static? Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ include/linux/if_frad.h | 6 +++-- include/linux/if_tr.h | 4 +++ include/linux/igmp.h | 3 +++ include/linux/net.h | 7 ++++++ include/linux/netdevice.h | 10 ++++++++ include/linux/netfilter_ipv6.h | 4 +-- include/linux/security.h | 6 +++-- include/linux/skbuff.h | 2 ++ include/linux/socket.h | 7 ++++++ include/net/addrconf.h | 6 +++++ include/net/af_unix.h | 15 ++++++++++++ include/net/icmp.h | 7 ++++++ include/net/ip.h | 23 ++++++++++++++++++ include/net/ip_fib.h | 5 ++++ include/net/ipv6.h | 37 +++++++++++++++++++++++++--- include/net/p8022.h | 2 ++ include/net/raw.h | 7 +++++- include/net/route.h | 2 ++ include/net/sock.h | 12 +++++++++ include/net/tcp.h | 12 +++++++++ include/net/udp.h | 5 ++++ init/main.c | 2 +- kernel/sysctl.c | 4 +-- net/802/p8023.c | 1 + net/802/sysctl_net_802.c | 5 ++-- net/core/dev.c | 6 ----- net/core/sysctl_net_core.c | 9 ++----- net/core/utils.c | 2 ++ net/core/wireless.c | 4 --- net/ethernet/eth.c | 2 -- net/ethernet/sysctl_net_ether.c | 1 + net/ipv4/af_inet.c | 14 ----------- net/ipv4/datagram.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/ip_sockglue.c | 2 -- net/ipv4/proc.c | 3 --- net/ipv4/syncookies.c | 2 -- net/ipv4/sysctl_net_ipv4.c | 43 ++++++--------------------------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/addrconf.c | 4 +-- net/ipv6/af_inet6.c | 24 ------------------ net/ipv6/ipv6_sockglue.c | 8 ------ net/ipv6/route.c | 6 ++--- net/ipv6/sit.c | 2 +- net/ipv6/sysctl_net_ipv6.c | 3 --- net/ipv6/tcp_ipv6.c | 4 --- net/ipv6/udp.c | 2 -- net/ipx/af_ipx.c | 2 -- net/socket.c | 11 ++++----- net/sysctl_net.c | 8 +++--- net/unix/af_unix.c | 8 ------ net/unix/sysctl_net_unix.c | 2 +- 54 files changed, 210 insertions(+), 164 deletions(-) diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index b5b58e9c054c..fc2d4c8225aa 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->mac.raw; } + +extern struct ctl_table ether_table[]; #endif #endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 3c94b1736570..511999c7eeda 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -191,10 +191,12 @@ struct frad_local int buffer; /* current buffer for S508 firmware */ }; -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - #endif /* __KERNEL__ */ #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ +#ifdef __KERNEL__ +extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); +#endif + #endif diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 3fba9e2f5427..5502f597cf0e 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -43,12 +43,16 @@ struct trh_hdr { }; #ifdef __KERNEL__ +#include #include static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) { return (struct trh_hdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL +extern struct ctl_table tr_table[]; +#endif #endif /* This is an Token-Ring LLC structure */ diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 0c31ef0b5bad..28f4f3b36950 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -129,6 +129,9 @@ struct igmpv3_query { #include #include +extern int sysctl_igmp_max_memberships; +extern int sysctl_igmp_max_msf; + struct ip_sf_socklist { unsigned int sl_max; diff --git a/include/linux/net.h b/include/linux/net.h index 5f8b632ff653..4e981585a89a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -286,5 +286,12 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) +#ifdef CONFIG_SYSCTL +#include +extern ctl_table net_table[]; +extern int net_msg_cost; +extern int net_msg_burst; +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8e52edfd526..1fcaa88b8625 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -244,6 +244,7 @@ struct netdev_boot_setup { }; #define NETDEV_BOOT_SETUP_MAX 8 +extern int __init netdev_boot_setup(char *str); /* * The DEVICE structure. @@ -673,6 +674,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern void dev_init(void); extern int netdev_nit; +extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); @@ -908,6 +910,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); +#ifdef CONFIG_PROC_FS +extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); +extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); +extern void dev_seq_stop(struct seq_file *seq, void *v); +#endif + +extern void linkwatch_run_queue(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 5d204ee7a312..edcc2c6eb5c7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,7 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +extern int ipv6_netfilter_init(void); +extern void ipv6_netfilter_fini(void); #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1c..7aab6ab7c57f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32635c401d4d..db10335e4192 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern void skb_release_data(struct sk_buff *skb); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/linux/socket.h b/include/linux/socket.h index acc55aac8a43..1739c2d5b95b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ +extern int sysctl_somaxconn; +extern void sock_init(void); +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + typedef unsigned short sa_family_t; /* diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed93672176..750e2508dd90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -45,6 +45,7 @@ struct prefix_info { #ifdef __KERNEL__ +#include #include #include #include @@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) addr->s6_addr32[3] == htonl(0x00000002)); } +#ifdef CONFIG_PROC_FS +extern int if6_proc_init(void); +extern void if6_proc_exit(void); +#endif + #endif #endif diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d1..b5d785ab4a0e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,5 +1,11 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include +#include +#include +#include + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -74,5 +80,14 @@ struct unix_sock { wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) + +#ifdef CONFIG_SYSCTL +extern int sysctl_unix_max_dgram_qlen; +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); +#else +static inline void unix_sysctl_register(void) {} +static inline void unix_sysctl_unregister(void) {} +#endif #endif #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb45..6cdebeee5f96 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; +extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; +extern int sysctl_icmp_ratelimit; +extern int sysctl_icmp_ratemask; + #endif /* _ICMP_H */ diff --git a/include/net/ip.h b/include/net/ip.h index c16fb6ac3446..7623e414a5fb 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,24 @@ extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +/* From ip_fragment.c */ +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; + +/* From inetpeer.c */ +extern int inet_peer_threshold; +extern int inet_peer_minttl; +extern int inet_peer_maxttl; +extern int inet_peer_gc_mintime; +extern int inet_peer_gc_maxtime; + +/* From ip_output.c */ +extern int sysctl_ip_dynaddr; + +extern void ipfrag_init(void); + #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ @@ -348,5 +366,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context); +#ifdef CONFIG_PROC_FS +extern int ip_misc_proc_init(void); +#endif + +extern struct ctl_table ipv4_table[]; #endif /* _IP_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac0..14de4ebd1211 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) #endif } +#ifdef CONFIG_PROC_FS +extern int fib_proc_init(void); +extern void fib_proc_exit(void); +#endif + #endif /* _NET_FIB_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c5a02ddc594a..3203eaff4bd4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -104,6 +104,7 @@ struct frag_hdr { #ifdef __KERNEL__ +#include #include /* sysctls */ @@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +extern int ip6_mc_source(int add, int omode, struct sock *sk, + struct group_source_req *pgsr); +extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, + struct group_filter __user *optval, + int __user *optlen); + +#ifdef CONFIG_PROC_FS +extern int ac6_proc_init(void); +extern void ac6_proc_exit(void); +extern int raw6_proc_init(void); +extern void raw6_proc_exit(void); +extern int tcp6_proc_init(void); +extern void tcp6_proc_exit(void); +extern int udp6_proc_init(void); +extern void udp6_proc_exit(void); +extern int ipv6_misc_proc_init(void); +extern void ipv6_misc_proc_exit(void); + +extern struct rt6_statistics rt6_stats; +#endif + +#ifdef CONFIG_SYSCTL +extern ctl_table ipv6_route_table[]; +extern ctl_table ipv6_icmp_table[]; + +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + #endif /* __KERNEL__ */ #endif /* _NET_IPV6_H */ - - - diff --git a/include/net/p8022.h b/include/net/p8022.h index 223f8fa9ffca..42e9fac51b31 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -8,4 +8,6 @@ extern struct datalink_proto * struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_8023_client(struct datalink_proto *dl); #endif diff --git a/include/net/raw.h b/include/net/raw.h index 1c4bc3e6809f..f47917469b12 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,10 +17,10 @@ #ifndef _RAW_H #define _RAW_H +#include extern struct proto raw_prot; - extern void raw_err(struct sock *, struct sk_buff *, u32 info); extern int raw_rcv(struct sock *, struct sk_buff *); @@ -39,4 +39,9 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +#ifdef CONFIG_PROC_FS +extern int raw_proc_init(void); +extern void raw_proc_exit(void); +#endif + #endif /* _RAW_H */ diff --git a/include/net/route.h b/include/net/route.h index 63c94558236d..dbe79ca67d31 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -195,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +extern ctl_table ipv4_route_table[]; + #endif /* _ROUTE_H */ diff --git a/include/net/sock.h b/include/net/sock.h index d59428877078..14183883e8e6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1370,4 +1370,16 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign } #endif +extern void sk_init(void); + +#ifdef CONFIG_SYSCTL +extern struct ctl_table core_table[]; +extern int sysctl_optmem_max; +#endif + +#ifdef CONFIG_PROC_FS +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; +#endif + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d958260af23c..d6bcf1317a6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1183,4 +1183,16 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern struct request_sock_ops tcp_request_sock_ops; + +extern int tcp_v4_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int tcp4_proc_init(void); +extern void tcp4_proc_exit(void); +#endif + +extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_init(void); + #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761dbc..107b9d791a1f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -94,6 +94,11 @@ struct udp_iter_state { struct seq_operations seq_ops; }; +#ifdef CONFIG_PROC_FS extern int udp_proc_register(struct udp_seq_afinfo *afinfo); extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); + +extern int udp4_proc_init(void); +extern void udp4_proc_exit(void); +#endif #endif /* _UDP_H */ diff --git a/init/main.c b/init/main.c index c9c311cf1771..ff410063e4e1 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,6 @@ static int init(void *); extern void init_IRQ(void); -extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e0bbee549ea..8e56e2495542 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -136,9 +137,6 @@ static struct ctl_table_header root_table_header = static ctl_table kern_table[]; static ctl_table vm_table[]; -#ifdef CONFIG_NET -extern ctl_table net_table[]; -#endif static ctl_table proc_table[]; static ctl_table fs_table[]; static ctl_table debug_table[]; diff --git a/net/802/p8023.c b/net/802/p8023.c index a0b61b40225f..6368d3dce444 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -20,6 +20,7 @@ #include #include +#include /* * Place an 802.3 header on a packet. The driver will do the mac diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 36079630c49f..700129556c13 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c @@ -10,9 +10,10 @@ * 2 of the License, or (at your option) any later version. */ -#include -#include #include +#include +#include +#include #ifdef CONFIG_TR extern int sysctl_tr_rif_timeout; diff --git a/net/core/dev.c b/net/core/dev.c index a3ed53cc4af8..c01511e3d0c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt) spin_unlock_bh(&ptype_lock); } -extern void linkwatch_run_queue(void); - - - /** * __dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -1133,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -extern void skb_release_data(struct sk_buff *); - /* Keep head the same: replace data */ int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8f817ad9f546..2f278c8e4743 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -9,23 +9,18 @@ #include #include #include +#include +#include #ifdef CONFIG_SYSCTL extern int netdev_max_backlog; -extern int netdev_budget; extern int weight_p; -extern int net_msg_cost; -extern int net_msg_burst; extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern __u32 sysctl_wmem_default; -extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; -extern int sysctl_optmem_max; -extern int sysctl_somaxconn; #ifdef CONFIG_NET_DIVERT extern char sysctl_divert_version[]; diff --git a/net/core/utils.c b/net/core/utils.c index 88eb8b68e26b..7b5970fc9e40 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include diff --git a/net/core/wireless.c b/net/core/wireless.c index 19fa6a5389b3..5caae2399f3a 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v) return 0; } -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); - static struct seq_operations wireless_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f444a2f2675f..87a052a9a84f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include #include -extern int __init netdev_boot_setup(char *str); - __setup("ether=", netdev_boot_setup); /* diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c index b81a6d532342..66b39fc342d2 100644 --- a/net/ethernet/sysctl_net_ether.c +++ b/net/ethernet/sysctl_net_ether.c @@ -7,6 +7,7 @@ #include #include +#include ctl_table ether_table[] = { {0} diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 20f52b5f5dea..5810f9d14914 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -859,10 +859,6 @@ static struct net_proto_family inet_family_ops = { .owner = THIS_MODULE, }; - -extern void tcp_init(void); -extern void tcp_v4_init(struct net_proto_family *); - /* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ @@ -1132,7 +1128,6 @@ static int __init init_ipv4_mibs(void) } static int ipv4_proc_init(void); -extern void ipfrag_init(void); /* * IP protocol layer initialiser @@ -1253,19 +1248,10 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -extern int fib_proc_init(void); -extern void fib_proc_exit(void); #ifdef CONFIG_IP_FIB_TRIE extern int fib_stat_proc_init(void); extern void fib_stat_proc_exit(void); #endif -extern int ip_misc_proc_init(void); -extern int raw_proc_init(void); -extern void raw_proc_exit(void); -extern int tcp4_proc_init(void); -extern void tcp4_proc_exit(void); -extern int udp4_proc_init(void); -extern void udp4_proc_exit(void); static int __init ipv4_proc_init(void) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 3fd49f4282ac..c1b42b5257f8 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 3c513ceaca76..4410b9dc03e9 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ddb1aedbdc6d..aca088b3707a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case IP_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct ip_msfilter *msf; @@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct sockaddr_in *psin; struct ip_msfilter *msf = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3eadbb271871..f7943ba1f43c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,6 @@ static int fold_prot_inuse(struct proto *proto) */ static int sockstat_seq_show(struct seq_file *seq, void *v) { - /* From net/socket.c */ - extern void socket_seq_show(struct seq_file *seq); - socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8692cb9d4bdb..a34e60ea48a1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; } -extern struct request_sock_ops tcp_request_sock_ops; - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ce47a345ecc5..652685623519 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -19,36 +21,6 @@ /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; -/* From icmp.c */ -extern int sysctl_icmp_echo_ignore_all; -extern int sysctl_icmp_echo_ignore_broadcasts; -extern int sysctl_icmp_ignore_bogus_error_responses; -extern int sysctl_icmp_errors_use_inbound_ifaddr; - -/* From ip_fragment.c */ -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - -/* From icmp.c */ -extern int sysctl_icmp_ratelimit; -extern int sysctl_icmp_ratemask; - -/* From igmp.c */ -extern int sysctl_igmp_max_memberships; -extern int sysctl_igmp_max_msf; - -/* From inetpeer.c */ -extern int inet_peer_threshold; -extern int inet_peer_minttl; -extern int inet_peer_maxttl; -extern int inet_peer_gc_mintime; -extern int inet_peer_gc_maxtime; - #ifdef CONFIG_SYSCTL static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; struct ipv4_config ipv4_config; -extern ctl_table ipv4_route_table[]; - #ifdef CONFIG_SYSCTL static @@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) +static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen, + void **context) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ebb8654e3dee..1afb080bdf0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4229,7 +4229,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0, 0); + tcp_ack_saw_tstamp(sk, NULL, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 97bbf595230d..13dfb391cdf1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,6 @@ #include #include -extern int sysctl_ip_dynaddr; int sysctl_tcp_tw_reuse; int sysctl_tcp_low_latency; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 493abf94bcfc..937ad32db77c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &maddr); } -void addrconf_join_anycast(struct inet6_ifaddr *ifp) +static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp) ipv6_dev_ac_inc(ifp->idev->dev, &addr); } -void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7df2ccb380d9..4f8795af2edb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -67,23 +67,6 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -/* IPv6 procfs goodies... */ - -#ifdef CONFIG_PROC_FS -extern int raw6_proc_init(void); -extern void raw6_proc_exit(void); -extern int tcp6_proc_init(void); -extern void tcp6_proc_exit(void); -extern int udp6_proc_init(void); -extern void udp6_proc_exit(void); -extern int ipv6_misc_proc_init(void); -extern void ipv6_misc_proc_exit(void); -extern int ac6_proc_init(void); -extern void ac6_proc_exit(void); -extern int if6_proc_init(void); -extern void if6_proc_exit(void); -#endif - int sysctl_ipv6_bindv6only; /* The inetsw table contains everything that inet_create needs to @@ -505,11 +488,6 @@ static struct net_proto_family inet6_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void ipv6_sysctl_register(void); -extern void ipv6_sysctl_unregister(void); -#endif - /* Same as inet6_dgram_ops, sans udp_poll. */ static struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, @@ -676,8 +654,6 @@ static void cleanup_ipv6_mibs(void) snmp6_mib_free((void **)udp_stats_in6); } -extern int ipv6_misc_proc_init(void); - static int __init inet6_init(void) { struct sk_buff *dummy_skb; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76fe23925d77..7516b8829a9d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) return 0; } -extern int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr); -extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); -extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); - - int ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -446,7 +439,6 @@ done: } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_mld_max_msf; struct group_filter *gsf; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ea494ab4e02..5d5bbb49ec78 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -int ip6_pkt_discard(struct sk_buff *skb) +static int ip6_pkt_discard(struct sk_buff *skb) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); @@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb) return 0; } -int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sk_buff *skb) { skb->dev = skb->dst->dev; return ip6_pkt_discard(skb); @@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) return arg.len; } -extern struct rt6_statistics rt6_stats; - static int rt6_stats_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e553e5b80d6e..c3123c9e1a8e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -770,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -int __init ipip6_fb_tunnel_init(struct net_device *dev) +static int __init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = dev->priv; struct iphdr *iph = &tunnel->parms.iph; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3a18e0e6ffed..8eff9fa1e983 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,9 +14,6 @@ #include #include -extern ctl_table ipv6_route_table[]; -extern ctl_table ipv6_icmp_table[]; - #ifdef CONFIG_SYSCTL static ctl_table ipv6_table[] = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fb291b81cf63..794734f1d230 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1910,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk) static int tcp_v6_destroy_sock(struct sock *sk) { - extern int tcp_v4_destroy_sock(struct sock *sk); - tcp_v4_destroy_sock(sk); return inet6_destroy_sock(sk); } @@ -2123,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -extern struct proto_ops inet6_stream_ops; - static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c348307e5773..67d9a04b6902 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1054,8 +1054,6 @@ struct proto udpv6_prot = { .obj_size = sizeof(struct udp6_sock), }; -extern struct proto_ops inet6_dgram_ops; - static struct inet_protosw udpv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 180e383f707c..34b3bb868409 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = { }; extern struct datalink_proto *make_EII_client(void); -extern struct datalink_proto *make_8023_client(void); extern void destroy_EII_client(struct datalink_proto *); -extern void destroy_8023_client(struct datalink_proto *); static unsigned char ipx_8022_type = 0xE0; static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; diff --git a/net/socket.c b/net/socket.c index 5f76ab8a1594..ce69b7862f59 100644 --- a/net/socket.c +++ b/net/socket.c @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include #include @@ -724,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return __sock_sendmsg(iocb, sock, &x->async_msg, size); } -ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) +static ssize_t sock_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) { struct socket *sock; int flags; @@ -948,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) return sock->ops->mmap(file, sock, vma); } -int sock_close(struct inode *inode, struct file *filp) +static int sock_close(struct inode *inode, struct file *filp) { /* * It was possible the inode is NULL we were @@ -2027,9 +2029,6 @@ int sock_unregister(int family) return 0; } - -extern void sk_init(void); - void __init sock_init(void) { /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 3f6e31069c54..c5241fcbb966 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -17,17 +17,15 @@ #include #ifdef CONFIG_INET -extern struct ctl_table ipv4_table[]; +#include #endif -extern struct ctl_table core_table[]; - #ifdef CONFIG_NET -extern struct ctl_table ether_table[]; +#include #endif #ifdef CONFIG_TR -extern struct ctl_table tr_table[]; +#include #endif struct ctl_table net_table[] = { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index bc4c44552c1f..41feca3bef86 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); -#else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} -#endif - static int __init af_unix_init(void) { int rc = -1; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index c974dac4580a..690ffa5d5bfb 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -12,7 +12,7 @@ #include #include -extern int sysctl_unix_max_dgram_qlen; +#include static ctl_table unix_table[] = { { From 6ed8a48582c08432e84e5610564c1d25fe00dd7f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 19:02:15 -0300 Subject: [PATCH 419/584] [NETLINK]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7d1d9683b246..167518668936 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -129,7 +129,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, int allocation); + __u32 group, unsigned int __nocast allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e259f46e26f7..62435ffc6184 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -861,7 +861,7 @@ out: } int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, int allocation) + u32 group, unsigned int __nocast allocation) { struct netlink_broadcast_data info; struct hlist_node *node; From 4c6ea29d82e0d1b9b37e6b879e0a7fd6c409333d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 19:46:48 -0300 Subject: [PATCH 420/584] [IP]: Introduce ip_options_get_from_user This variant is needed to satisfy sparse __user annotations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/ip.h | 5 ++++- net/ipv4/ip_options.c | 49 +++++++++++++++++++++++++++++------------- net/ipv4/ip_sockglue.c | 4 ++-- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 7623e414a5fb..e4563bbee6ea 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -335,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); -extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); +extern int ip_options_get(struct ip_options **optp, + unsigned char *data, int optlen); +extern int ip_options_get_from_user(struct ip_options **optp, + unsigned char __user *data, int optlen); extern void ip_options_undo(struct ip_options * opt); extern void ip_forward_options(struct sk_buff *skb); extern int ip_options_rcv_srr(struct sk_buff *skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 7e02ba584079..bce4e875193b 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -489,23 +489,18 @@ void ip_options_undo(struct ip_options * opt) } } -int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user) +static struct ip_options *ip_options_get_alloc(const int optlen) { - struct ip_options *opt; + struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), + GFP_KERNEL); + if (opt) + memset(opt, 0, sizeof(*opt)); + return opt; +} - opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL); - if (!opt) - return -ENOMEM; - memset(opt, 0, sizeof(struct ip_options)); - if (optlen) { - if (user) { - if (copy_from_user(opt->__data, data, optlen)) { - kfree(opt); - return -EFAULT; - } - } else - memcpy(opt->__data, data, optlen); - } +static int ip_options_get_finish(struct ip_options **optp, + struct ip_options *opt, int optlen) +{ while (optlen & 3) opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; @@ -521,6 +516,30 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in return 0; } +int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) +{ + struct ip_options *opt = ip_options_get_alloc(optlen); + + if (!opt) + return -ENOMEM; + if (optlen && copy_from_user(opt->__data, data, optlen)) { + kfree(opt); + return -EFAULT; + } + return ip_options_get_finish(optp, opt, optlen); +} + +int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) +{ + struct ip_options *opt = ip_options_get_alloc(optlen); + + if (!opt) + return -ENOMEM; + if (optlen) + memcpy(opt->__data, data, optlen); + return ip_options_get_finish(optp, opt, optlen); +} + void ip_forward_options(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index aca088b3707a..2f0b47da5b37 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -153,7 +153,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0); + err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) return err; break; @@ -425,7 +425,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, struct ip_options * opt = NULL; if (optlen > 40 || optlen < 0) goto e_inval; - err = ip_options_get(&opt, optval, optlen, 1); + err = ip_options_get_from_user(&opt, optval, optlen); if (err) break; if (sk->sk_type == SOCK_STREAM) { From bf73d1c5d726ac717755efc7e15d2a86dd383448 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 16 Aug 2005 20:45:45 -0700 Subject: [PATCH 421/584] [IRDA]: Possible cleanups. This patch contains the following possible cleanups: - make the following needlessly global function static: - irnet/irnet_ppp.c: irnet_init - remove the following unneeded EXPORT_SYMBOL's: - irlmp.c: sysctl_discovery_timeout - irlmp.c: irlmp_reasons - irlmp.c: irlmp_dup - irqueue.c: hashbin_find_next Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/irda/irlmp.c | 3 --- net/irda/irnet/irnet.h | 3 --- net/irda/irnet/irnet_ppp.c | 2 +- net/irda/irqueue.c | 1 - 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7a4a4d7fbe66..c19e9ce05a3a 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -53,7 +53,6 @@ struct irlmp_cb *irlmp = NULL; /* These can be altered by the sysctl interface */ int sysctl_discovery = 0; int sysctl_discovery_timeout = 3; /* 3 seconds by default */ -EXPORT_SYMBOL(sysctl_discovery_timeout); int sysctl_discovery_slots = 6; /* 6 slots by default */ int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ; char sysctl_devname[65]; @@ -67,7 +66,6 @@ const char *irlmp_reasons[] = { "LM_INIT_DISCONNECT", "ERROR, NOT USED", }; -EXPORT_SYMBOL(irlmp_reasons); /* * Function irlmp_init (void) @@ -675,7 +673,6 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance) return new; } -EXPORT_SYMBOL(irlmp_dup); /* * Function irlmp_disconnect_request (handle, userdata) diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 9004f7349a76..b391cb3893d4 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -517,9 +517,6 @@ extern int irda_irnet_init(void); /* Initialise IrDA part of IrNET */ extern void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ -/* ---------------------------- MODULE ---------------------------- */ -extern int - irnet_init(void); /* Initialise IrNET module */ /**************************** VARIABLES ****************************/ diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index f8f984bb9922..e53bf9e0053e 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -1107,7 +1107,7 @@ ppp_irnet_cleanup(void) /* * Module main entry point */ -int __init +static int __init irnet_init(void) { int err; diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index b0dd3ea35999..1ba8c7106639 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -822,7 +822,6 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name, return entry; } -EXPORT_SYMBOL(hashbin_find_next); /* * Function hashbin_get_first (hashbin) From ba602a816132dcc66e875dddf2c62512a9f6f8cb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Aug 2005 20:50:16 -0700 Subject: [PATCH 422/584] [IPVS]: Rename tcp_{init,exit}() --> ip_vs_tcp_{init,exit}() Conflicts with global namespace functions with the same name. Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_proto_tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index e65de675da74..c19408973c09 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -604,14 +604,14 @@ void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) } -static void tcp_init(struct ip_vs_protocol *pp) +static void ip_vs_tcp_init(struct ip_vs_protocol *pp) { IP_VS_INIT_HASH_TABLE(tcp_apps); pp->timeout_table = tcp_timeouts; } -static void tcp_exit(struct ip_vs_protocol *pp) +static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) { } @@ -621,8 +621,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .protocol = IPPROTO_TCP, .dont_defrag = 0, .appcnt = ATOMIC_INIT(0), - .init = tcp_init, - .exit = tcp_exit, + .init = ip_vs_tcp_init, + .exit = ip_vs_tcp_exit, .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, From e92ae93a8aa66aea12935420cb22d4df1c18d023 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Aug 2005 03:10:59 -0300 Subject: [PATCH 423/584] [DCCP]: Send SYNCACK packets in response to SYNC packets Also fix step 6 when receiving SYNC or SYNCACK packets, i.e. we were not using the updated swl. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 3 ++- net/dccp/input.c | 33 +++++++++++++++++++++++++-------- net/dccp/ipv4.c | 18 ++++++++++-------- net/dccp/output.c | 5 +++-- 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index fff794c8dfff..4efdce47000b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -122,7 +122,8 @@ extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_send_response(struct sock *sk); extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); -extern void dccp_send_sync(struct sock *sk, u64 seq); +extern void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type); extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index 9dadfc362511..68b6e72551ef 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -50,7 +50,7 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) * Drop packet and return */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return; } @@ -76,8 +76,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl = dp->dccps_swl; - u64 lawl = dp->dccps_awl; + u64 lswl, lawl; /* * Step 5: Prepare sequence numbers for Sync @@ -99,6 +98,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); else return -1; + } + /* * Step 6: Check sequence numbers * Let LSWL = S.SWL and LAWL = S.AWL @@ -113,7 +114,10 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Send Sync packet acknowledging P.seqno * Drop packet and return */ - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || + lswl = dp->dccps_swl; + lawl = dp->dccps_awl; + + if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { lswl = dp->dccps_gsr; @@ -132,8 +136,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - dccp_pr_debug("Step 6 failed, sending SYNC...\n"); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + LIMIT_NETDEBUG("Step 6 failed, sending SYNC...\n"); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } @@ -242,9 +246,21 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, check_seq: if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { send_sync: - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNC); } break; + case DCCP_PKT_SYNC: + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNCACK); + /* + * From the draft: + * + * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets + * MAY have non-zero-length application data areas, whose + * contents * receivers MUST ignore. + */ + goto discard; } DCCP_INC_STATS_BH(DCCP_MIB_INERRS); @@ -517,7 +533,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNC); goto discard; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bc3cfc0533cc..335e00e9631d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -376,7 +376,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * probing, since DCCP-Sync probes do not risk application * data loss. */ - dccp_send_sync(sk, dp->dccps_gsr); + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /* else let the usual retransmit timer handle it */ } @@ -1008,7 +1008,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) return 1; if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - dccp_pr_debug("pskb_may_pull failed\n"); + printk(KERN_WARNING "DCCP: pskb_may_pull failed\n"); return 1; } @@ -1016,7 +1016,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) /* If the packet type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - dccp_pr_debug("invalid packet type\n"); + printk(KERN_WARNING "DCCP: invalid packet type\n"); return 1; } @@ -1025,12 +1025,13 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) * packet, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); + printk(KERN_WARNING "DCCP: Offset(%u) too small 1\n", + dh->dccph_doff); return 1; } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - dccp_pr_debug("P.Data Offset(%u) too small 2\n", + printk(KERN_WARNING "DCCP: P.Data Offset(%u) too small 2\n", dh->dccph_doff); return 1; } @@ -1045,15 +1046,16 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and " - "P.X == 0\n", dccp_packet_name(dh->dccph_type)); + printk(KERN_WARNING "DCCP: P.type (%s) not Data, Ack nor " + "DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } /* If the header checksum is incorrect, drop packet and return */ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, skb->nh.iph->daddr) < 0) { - dccp_pr_debug("header checksum is incorrect\n"); + printk(KERN_WARNING "DCCP: header checksum is incorrect\n"); return 1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index dcc061bed924..384fd0920983 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -404,7 +404,8 @@ void dccp_send_delayed_ack(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } -void dccp_send_sync(struct sock *sk, u64 seq) +void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type) { /* * We are not putting this on the write queue, so @@ -420,7 +421,7 @@ void dccp_send_sync(struct sock *sk, u64 seq) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_DCCP_HEADER); skb->csum = 0; - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; + DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; skb_set_owner_w(skb, sk); From d179cd12928443f3ec29cfbc3567439644bd0afc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Aug 2005 14:57:30 -0700 Subject: [PATCH 424/584] [NET]: Implement SKB fast cloning. Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 ++++++++++++-- include/net/sock.h | 2 +- net/core/skbuff.c | 80 +++++++++++++++++++++++++++++++++++++----- net/ipv4/tcp_output.c | 4 +-- 4 files changed, 97 insertions(+), 15 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index db10335e4192..42edce6abe23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -162,6 +162,13 @@ struct skb_timeval { u32 off_usec; }; + +enum { + SKB_FCLONE_UNAVAILABLE, + SKB_FCLONE_ORIG, + SKB_FCLONE_CLONE, +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -255,7 +262,8 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; - __u8 pkt_type; + __u8 pkt_type:3, + fclone:2; __be16 protocol; void (*destructor)(struct sk_buff *skb); @@ -295,8 +303,20 @@ struct sk_buff { #include extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority); +extern struct sk_buff *__alloc_skb(unsigned int size, + unsigned int __nocast priority, int fclone); +static inline struct sk_buff *alloc_skb(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 0); +} + +static inline struct sk_buff *alloc_skb_fclone(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 1); +} + extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, unsigned int __nocast priority); diff --git a/include/net/sock.h b/include/net/sock.h index 14183883e8e6..d57aece9492c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int hdr_len; hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); - skb = alloc_skb(size + hdr_len, gfp); + skb = alloc_skb_fclone(size + hdr_len, gfp); if (skb) { skb->truesize += mem; if (sk->sk_forward_alloc >= (int)skb->truesize || diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 39a161dbc16d..b853a9b29eb6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include static kmem_cache_t *skbuff_head_cache; +static kmem_cache_t *skbuff_fclone_cache; struct timeval __read_mostly skb_tv_base; @@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ /** - * alloc_skb - allocate a network buffer + * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) +struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, + int fclone) { struct sk_buff *skb; u8 *data; /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); + if (fclone) + skb = kmem_cache_alloc(skbuff_fclone_cache, + gfp_mask & ~__GFP_DMA); + else + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) goto out; @@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) skb->data = data; skb->tail = data; skb->end = data + size; + if (fclone) { + struct sk_buff *child = skb + 1; + atomic_t *fclone_ref = (atomic_t *) (child + 1); + skb->fclone = SKB_FCLONE_ORIG; + atomic_set(fclone_ref, 1); + + child->fclone = SKB_FCLONE_UNAVAILABLE; + } atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->tso_size = 0; @@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { + struct sk_buff *other; + atomic_t *fclone_ref; + skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); + switch (skb->fclone) { + case SKB_FCLONE_UNAVAILABLE: + kmem_cache_free(skbuff_head_cache, skb); + break; + + case SKB_FCLONE_ORIG: + fclone_ref = (atomic_t *) (skb + 2); + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, skb); + break; + + case SKB_FCLONE_CLONE: + fclone_ref = (atomic_t *) (skb + 1); + other = skb - 1; + + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_UNAVAILABLE; + + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, other); + break; + }; } /** @@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb) struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + struct sk_buff *n; - if (!n) - return NULL; + n = skb + 1; + if (skb->fclone == SKB_FCLONE_ORIG && + n->fclone == SKB_FCLONE_UNAVAILABLE) { + atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone = SKB_FCLONE_CLONE; + atomic_inc(fclone_ref); + } else { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + n->fclone = SKB_FCLONE_UNAVAILABLE; + } #define C(x) n->x = skb->x @@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; @@ -1647,13 +1699,23 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); +EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8d92ab562aed..75b68116682a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); + skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); if (skb) break; yield(); @@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk) tcp_connect_init(sk); - buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); + buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); if (unlikely(buff == NULL)) return -ENOBUFS; From b1c9fe7b818acbd36dc908c5c1ad4cab34c67b39 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Thu, 18 Aug 2005 20:45:29 -0300 Subject: [PATCH 425/584] [DCCP]: Fix elapsed time option as per section 13.2 of spec v11 The elapsed time can be two bytes or four bytes only. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/options.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/dccp/options.c b/net/dccp/options.c index d87d6be7ab10..85a86bd61f44 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -293,10 +293,7 @@ static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) static inline int dccp_elapsed_time_len(const u32 elapsed_time) { - return elapsed_time == 0 ? 0 : - elapsed_time <= 0xFF ? 1 : - elapsed_time <= 0xFFFF ? 2 : - elapsed_time <= 0xFFFFFF ? 3 : 4; + return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } void dccp_insert_option_elapsed_time(struct sock *sk, From 5480855bfbc125f34d9b752689bb9a64da7e1fc6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Aug 2005 20:47:02 -0300 Subject: [PATCH 426/584] [DCCP]: Set dccp_ctl_socket to NULL in dccp_ctl_sock_exit Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0b715ceb38b5..8b613c3017c5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -506,8 +506,10 @@ static int __init dccp_ctl_sock_init(void) #ifdef CONFIG_IP_DCCP_UNLOAD_HACK void dccp_ctl_sock_exit(void) { - if (dccp_ctl_socket != NULL) + if (dccp_ctl_socket != NULL) { sock_release(dccp_ctl_socket); + dccp_ctl_socket = NULL; + } } EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); From c59eab4637dbc3f832503be4ccb9213b0f323d92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Aug 2005 21:12:02 -0300 Subject: [PATCH 427/584] [DCCP]: Use LIMIT_NETDEBUG in some debugging printks Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 11 ++++++----- net/dccp/ipv4.c | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 68b6e72551ef..3c4cbff82e95 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -118,8 +118,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) lawl = dp->dccps_awl; if (dh->dccph_type == DCCP_PKT_CLOSEREQ || - dh->dccph_type == DCCP_PKT_CLOSE || - dh->dccph_type == DCCP_PKT_RESET) { + dh->dccph_type == DCCP_PKT_CLOSE || + dh->dccph_type == DCCP_PKT_RESET) { lswl = dp->dccps_gsr; dccp_inc_seqno(&lswl); lawl = dp->dccps_gar; @@ -136,7 +136,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG("Step 6 failed, sending SYNC...\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed, " + "sending SYNC...\n"); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } @@ -168,8 +169,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable " - "packets buffer full!\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " + "packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 335e00e9631d..cc5d60d9afa4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1008,7 +1008,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) return 1; if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - printk(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); return 1; } @@ -1016,7 +1016,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) /* If the packet type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - printk(KERN_WARNING "DCCP: invalid packet type\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); return 1; } @@ -1025,14 +1025,16 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) * packet, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - printk(KERN_WARNING "DCCP: Offset(%u) too small 1\n", - dh->dccph_doff); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " + "too small 1\n", + dh->dccph_doff); return 1; } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - printk(KERN_WARNING "DCCP: P.Data Offset(%u) too small 2\n", - dh->dccph_doff); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " + "too small 2\n", + dh->dccph_doff); return 1; } @@ -1046,16 +1048,17 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - printk(KERN_WARNING "DCCP: P.type (%s) not Data, Ack nor " - "DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " + "nor DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } /* If the header checksum is incorrect, drop packet and return */ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, skb->nh.iph->daddr) < 0) { - printk(KERN_WARNING "DCCP: header checksum is incorrect\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is " + "incorrect\n"); return 1; } From bf0ff9e578ba7dd8331005f00ad7310122011f43 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Aug 2005 16:37:30 -0700 Subject: [PATCH 428/584] [IPVS]: ipv4_table --> ipvs_ipv4_table Fix conflict with symbol of same name in global namespace. Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_ctl.c | 4 ++-- net/ipv4/ipvs/ip_vs_lblc.c | 4 ++-- net/ipv4/ipvs/ip_vs_lblcr.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 7d99ede2ef79..2d66848e7aa0 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -1598,7 +1598,7 @@ static ctl_table vs_table[] = { { .ctl_name = 0 } }; -static ctl_table ipv4_table[] = { +static ctl_table ipvs_ipv4_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", @@ -1613,7 +1613,7 @@ static ctl_table vs_root_table[] = { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = ipv4_table, + .child = ipvs_ipv4_table, }, { .ctl_name = 0 } }; diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index c035838b780a..561cda326fa8 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -131,7 +131,7 @@ static ctl_table vs_table[] = { { .ctl_name = 0 } }; -static ctl_table ipv4_table[] = { +static ctl_table ipvs_ipv4_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", @@ -146,7 +146,7 @@ static ctl_table lblc_root_table[] = { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = ipv4_table + .child = ipvs_ipv4_table }, { .ctl_name = 0 } }; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 22b5dd55d271..ce456dbf09a5 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -320,7 +320,7 @@ static ctl_table vs_table[] = { { .ctl_name = 0 } }; -static ctl_table ipv4_table[] = { +static ctl_table ipvs_ipv4_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", @@ -335,7 +335,7 @@ static ctl_table lblcr_root_table[] = { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = ipv4_table + .child = ipvs_ipv4_table }, { .ctl_name = 0 } }; From 1bc0986957b63a2fbbc46ab95d3d1d72830bda83 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 20 Aug 2005 00:23:43 -0300 Subject: [PATCH 429/584] [DCCP]: Fix the timestamp options This changes timestamp, timestamp echo, and elapsed time to use units of 10 usecs as per DCCP spec. This has been tested to verify that times are correct. Also fixed up length and used hton/ntoh more. Still to add in later patches: - actually use elapsed time to adjust RTT (commented out as was prior to this patch) - send options at times more closely following the spec (content is now correct) Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- net/dccp/ccids/ccid3.c | 12 +----- net/dccp/dccp.h | 19 +++++++++- net/dccp/options.c | 85 ++++++++++++++++++++++++++++-------------- 4 files changed, 78 insertions(+), 40 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dccdd5108b5..9e3a1370b906 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -415,7 +415,7 @@ struct dccp_sock { __u64 dccps_gsr; __u64 dccps_gar; unsigned long dccps_service; - unsigned long dccps_timestamp_time; + struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_avg_packet_size; unsigned long dccps_ndp_count; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2dd3e94ba8f4..694149061b8b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,12 +2,12 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -174,14 +174,6 @@ static inline void timeval_fix(struct timeval *tv) } } -/* returns the difference in usecs between timeval passed in and current time */ -static inline u32 now_delta(struct timeval tv) { - struct timeval now; - - do_gettimeofday(&now); - return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); -} - #define CALCX_ARRSIZE 500 #define CALCX_SPLIT 50000 @@ -1110,7 +1102,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; unsigned long next_tmout; - u16 t_elapsed; + u32 t_elapsed; u32 pinv; u32 x_recv; u32 r_sample; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 4efdce47000b..aab72b8d0703 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -4,7 +4,8 @@ * net/dccp/dccp.h * * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as @@ -404,6 +405,7 @@ extern struct socket *dccp_ctl_socket; * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * * @dccpap_buf_len - circular buffer length + * @dccpap_time - the time in usecs * @dccpap_buf - circular buffer of acknowledgeable packets */ struct dccp_ackpkts { @@ -416,7 +418,7 @@ struct dccp_ackpkts { unsigned int dccpap_buf_vector_len; unsigned int dccpap_ack_vector_len; unsigned int dccpap_buf_len; - unsigned long dccpap_time; + struct timeval dccpap_time; u8 dccpap_buf_nonce; u8 dccpap_ack_nonce; u8 dccpap_buf[0]; @@ -430,6 +432,19 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +/* + * Returns the difference in usecs between timeval + * passed in and current time + */ +static inline u32 now_delta(struct timeval tv) +{ + struct timeval now; + + do_gettimeofday(&now); + return (now.tv_sec - tv.tv_sec) * USEC_PER_SEC + + (now.tv_usec - tv.tv_usec); +} + #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 85a86bd61f44..7ecffdf85756 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -2,8 +2,9 @@ * net/dccp/options.c * * An implementation of the DCCP protocol - * Aristeu Sergio Rozanski Filho - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Aristeu Sergio Rozanski Filho + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -138,7 +139,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dp->dccps_timestamp_time = jiffies; + do_gettimeofday(&dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -146,36 +147,45 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; case DCCPO_TIMESTAMP_ECHO: - if (len < 4 || len > 8) + if (len != 4 && len != 6 && len != 8) goto out_invalid_option; opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, " - "diff=%u\n", + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", debug_prefix, opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (tcp_time_stamp - - opt_recv->dccpor_timestamp_echo)); + DCCP_SKB_CB(skb)->dccpd_ack_seq); - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value + 4, - len - 4); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", + if (len > 4) { + if (len == 6) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)(value + 4)); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)(value + 4)); + + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); + } break; case DCCPO_ELAPSED_TIME: - if (len > 4) + if (len != 2 && len != 4) goto out_invalid_option; if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value, len); + + if (len == 2) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)value); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)value); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); break; @@ -309,8 +319,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, const int len = 2 + elapsed_time_len; unsigned char *to; - /* If elapsed_time == 0... */ - if (elapsed_time_len == 2) + if (elapsed_time_len == 0) return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { @@ -325,7 +334,13 @@ void dccp_insert_option_elapsed_time(struct sock *sk, *to++ = DCCPO_ELAPSED_TIME; *to++ = len; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", debug_prefix, elapsed_time, @@ -344,7 +359,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + const u32 elapsed_time = now_delta(ap->dccpap_time) / 10; unsigned char *to, *from; if (elapsed_time != 0) @@ -414,7 +429,15 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - const u32 now = htonl(tcp_time_stamp); + struct timeval tv; + u32 now; + + do_gettimeofday(&tv); + now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + /* yes this will overflow but that is the point as we want a + * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ + + now = htonl(now); dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } @@ -427,8 +450,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = jiffies_to_usecs(jiffies - - dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = now_delta(dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; @@ -448,7 +470,14 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, tstamp_echo = htonl(dp->dccps_timestamp_echo); memcpy(to, &tstamp_echo, 4); to += 4; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else if (elapsed_time_len == 4) { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", debug_prefix, dp->dccps_timestamp_echo, @@ -456,7 +485,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time = 0; + dp->dccps_timestamp_time.tv_sec = 0; + dp->dccps_timestamp_time.tv_usec = 0; } void dccp_insert_options(struct sock *sk, struct sk_buff *skb) @@ -514,7 +544,8 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; ap->dccpap_ack_ptr = 0; - ap->dccpap_time = 0; + ap->dccpap_time.tv_sec = 0; + ap->dccpap_time.tv_usec = 0; ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; } @@ -665,7 +696,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - ap->dccpap_time = jiffies; + do_gettimeofday(&ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); From 8cd25c1fcfbf6460983e99091d278187421c1a1d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 20 Aug 2005 17:14:11 -0700 Subject: [PATCH 430/584] [NET]: fix PROC_FS=n compile Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/sock.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index d57aece9492c..312cb25cbd18 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1377,9 +1377,7 @@ extern struct ctl_table core_table[]; extern int sysctl_optmem_max; #endif -#ifdef CONFIG_PROC_FS extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; -#endif #endif /* _SOCK_H */ From a6f9a70578b981321b63786ac8015f17cca4fcbd Mon Sep 17 00:00:00 2001 From: Jon Wetzel Date: Sat, 20 Aug 2005 17:15:54 -0700 Subject: [PATCH 431/584] [NET]: Add support for getting the permanent hardware address. This patch adds a new field to net device to hold the permanent hardware address, and adds a new generic ethtool_op function to get that address. Signed-off-by: Jon Wetzel Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/linux/ethtool.h | 13 ++++++++++- include/linux/netdevice.h | 1 + net/core/ethtool.c | 49 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d7021c391b2b..ed1440ea4c91 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -250,6 +250,12 @@ struct ethtool_stats { u64 data[0]; }; +struct ethtool_perm_addr { + u32 cmd; /* ETHTOOL_GPERMADDR */ + u32 size; + u8 data[0]; +}; + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); int ethtool_op_set_tso(struct net_device *dev, u32 data); +int ethtool_op_get_perm_addr(struct net_device *dev, + struct ethtool_perm_addr *addr, u8 *data); /** * ðtool_ops - Alter and report network device settings @@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data); * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device * get_stats: Return statistics about the device - * + * get_perm_addr: Gets the permanent hardware address + * * Description: * * get_settings: @@ -352,6 +361,7 @@ struct ethtool_ops { int (*phys_id)(struct net_device *, u32); int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); }; @@ -389,6 +399,7 @@ struct ethtool_ops { #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1fcaa88b8625..7c717907896d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -337,6 +337,7 @@ struct net_device /* Interface address info. */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ + unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3eeb88e1c81..289c1b5a8e4a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } +int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) +{ + unsigned char len = dev->addr_len; + if ( addr->size < len ) + return -ETOOSMALL; + + addr->size = len; + memcpy(data, dev->perm_addr, len); + return 0; +} + + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr) +{ + struct ethtool_perm_addr epaddr; + u8 *data; + int ret; + + if (!dev->ethtool_ops->get_perm_addr) + return -EOPNOTSUPP; + + if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + return -EFAULT; + + data = kmalloc(epaddr.size, GFP_USER); + if (!data) + return -ENOMEM; + + ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) + goto out; + useraddr += sizeof(epaddr); + if (copy_to_user(useraddr, data, epaddr.size)) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; + case ETHTOOL_GPERMADDR: + rc = ethtool_get_perm_addr(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr) EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); +EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); From 2c656491e9ce77e12337073973794c4be467a489 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:24:25 -0700 Subject: [PATCH 432/584] [NET]: Fix ipl=>ihl typo in ip_fast_csum Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/asm-i386/checksum.h | 2 +- include/asm-m32r/checksum.h | 2 +- include/asm-x86_64/checksum.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index f949e44c2a35..67d3630c4e89 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h @@ -83,7 +83,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, "adcl $0, %0 ;\n" "notl %0 ;\n" "2: ;\n" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) diff --git a/include/asm-m32r/checksum.h b/include/asm-m32r/checksum.h index 99f37dbf2558..877ebf46e9ff 100644 --- a/include/asm-m32r/checksum.h +++ b/include/asm-m32r/checksum.h @@ -105,7 +105,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, " addx %0, %3 \n" " .fillinsn\n" "2: \n" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1) diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h index d01356f01448..989469e8e0b7 100644 --- a/include/asm-x86_64/checksum.h +++ b/include/asm-x86_64/checksum.h @@ -64,7 +64,7 @@ static inline unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl) " adcl $0, %0\n" " notl %0\n" "2:" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) From 58615242417638794a5ba299c49e3fbd6f47c2a3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:25:29 -0700 Subject: [PATCH 433/584] [IPV4]: Consistency and whitespace cleanup of ip_rcv() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 81e18023dc19..322b082ede1e 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -361,6 +361,7 @@ drop: int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; + u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. @@ -392,7 +393,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, */ if (iph->ihl < 5 || iph->version != 4) - goto inhdr_error; + goto inhdr_error; if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; @@ -400,21 +401,19 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = skb->nh.iph; if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto inhdr_error; + goto inhdr_error; - { - __u32 len = ntohs(iph->tot_len); - if (skb->len < len || len < (iph->ihl<<2)) - goto inhdr_error; + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl*4)) + goto inhdr_error; - /* Our transport medium may have padded the buffer out. Now we know it - * is IP we can trim to the true length of the frame. - * Note this now means skb->len holds ntohs(iph->tot_len). - */ - if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); - goto drop; - } + /* Our transport medium may have padded the buffer out. Now we know it + * is IP we can trim to the true length of the frame. + * Note this now means skb->len holds ntohs(iph->tot_len). + */ + if (pskb_trim_rcsum(skb, len)) { + IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + goto drop; } return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, From e9c604227391308b185aa6b14c7f93b0a0c2e51b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:25:52 -0700 Subject: [PATCH 434/584] [IPV4]: Avoid common branch misprediction while checking csum in ip_rcv() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 322b082ede1e..6a06e15694dc 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -400,7 +400,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = skb->nh.iph; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto inhdr_error; len = ntohs(iph->tot_len); From d245407e758b14c464c609b632873f85709360c7 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:26:12 -0700 Subject: [PATCH 435/584] [IPV4]: Move ip options parsing out of ip_rcv_finish() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 93 ++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6a06e15694dc..48e4ddc1e337 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -279,6 +279,58 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } +static inline int ip_rcv_options(struct sk_buff *skb) +{ + struct ip_options *opt; + struct iphdr *iph; + struct net_device *dev = skb->dev; + + /* It looks as overkill, because not all + IP options require packet mangling. + But it is the easiest for now, especially taking + into account that combination of IP options + and running sniffer is extremely rare condition. + --ANK (980813) + */ + if (skb_cow(skb, skb_headroom(skb))) { + IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + goto drop; + } + + iph = skb->nh.iph; + + if (ip_options_compile(NULL, skb)) { + IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + goto drop; + } + + opt = &(IPCB(skb)->opt); + if (unlikely(opt->srr)) { + struct in_device *in_dev = in_dev_get(dev); + if (in_dev) { + if (!IN_DEV_SOURCE_ROUTE(in_dev)) { + if (IN_DEV_LOG_MARTIANS(in_dev) && + net_ratelimit()) + printk(KERN_INFO "source route option " + "%u.%u.%u.%u -> %u.%u.%u.%u\n", + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr)); + in_dev_put(in_dev); + goto drop; + } + + in_dev_put(in_dev); + } + + if (ip_options_rcv_srr(skb)) + goto drop; + } + + return 0; +drop: + return -1; +} + static inline int ip_rcv_finish(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -308,48 +360,11 @@ static inline int ip_rcv_finish(struct sk_buff *skb) } #endif - if (iph->ihl > 5) { - struct ip_options *opt; - - /* It looks as overkill, because not all - IP options require packet mangling. - But it is the easiest for now, especially taking - into account that combination of IP options - and running sniffer is extremely rare condition. - --ANK (980813) - */ - - if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); - goto drop; - } - iph = skb->nh.iph; - - if (ip_options_compile(NULL, skb)) - goto inhdr_error; - - opt = &(IPCB(skb)->opt); - if (opt->srr) { - struct in_device *in_dev = in_dev_get(dev); - if (in_dev) { - if (!IN_DEV_SOURCE_ROUTE(in_dev)) { - if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n", - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); - in_dev_put(in_dev); - goto drop; - } - in_dev_put(in_dev); - } - if (ip_options_rcv_srr(skb)) - goto drop; - } - } + if (iph->ihl > 5 && ip_rcv_options(skb)) + goto drop; return dst_input(skb); -inhdr_error: - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); return NET_RX_DROP; From 3e192beaf5ef260a31e84a12c0a04eff2eec02ab Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:26:30 -0700 Subject: [PATCH 436/584] [IPV4]: Avoid common branch mispredictions in ip_rcv_finish() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 48e4ddc1e337..7e78095baef1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -333,16 +333,16 @@ drop: static inline int ip_rcv_finish(struct sk_buff *skb) { - struct net_device *dev = skb->dev; struct iphdr *iph = skb->nh.iph; - int err; /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (skb->dst == NULL) { - if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + if (likely(skb->dst == NULL)) { + int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev); + if (unlikely(err)) { if (err == -EHOSTUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); goto drop; @@ -350,7 +350,7 @@ static inline int ip_rcv_finish(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ROUTE - if (skb->dst->tclassid) { + if (unlikely(skb->dst->tclassid)) { struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); u32 idx = skb->dst->tclassid; st[idx&0xFF].o_packets++; From 9070683bdac59a3b26e2ce6dd0d05fbfcb3fc7d8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:27:09 -0700 Subject: [PATCH 437/584] [IPV4]: Remove some dead code from ip_forward() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 77094aac6c28..0923add122b4 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -76,16 +76,12 @@ int ip_forward(struct sk_buff *skb) * that reaches zero, we must reply an ICMP control message telling * that the packet's lifetime expired. */ - - iph = skb->nh.iph; - - if (iph->ttl <= 1) + if (skb->nh.iph->ttl <= 1) goto too_many_hops; if (!xfrm4_route_forward(skb)) goto drop; - iph = skb->nh.iph; rt = (struct rtable*)skb->dst; if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) From 33d043d65bbd3d97efca96c9bbada443cac3c4da Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:27:34 -0700 Subject: [PATCH 438/584] [IPV4]: ip_finish_output() can be inlined Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 19f24f778dc8..3f1a263e1249 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -200,7 +200,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip_finish_output(struct sk_buff *skb) +static inline int ip_finish_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; From c68e64cfb5ac675b002215b5659146b73d2e9d5d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:07:37 -0300 Subject: [PATCH 439/584] [CCID3]: Reintroduce ccid3hctx_t_rto CCID3 keeps this variable in usecs, inet_connection_socks in jiffies, so to avoid Mars orbiter losses lets reintroduce ccid3hctx_t_rto 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 10 +++++----- net/dccp/ccids/ccid3.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 694149061b8b..ffd5b449f5f1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -905,7 +905,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) hctx->ccid3hctx_x = 10; } /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ - next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, + next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); break; default: @@ -1180,8 +1180,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample); /* Update timeout interval */ - inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - USEC_PER_SEC); + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC); /* Update receive rate */ hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ @@ -1227,7 +1227,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* to prevent divide by zero below */ /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ - next_tmout = max(inet_csk(sk)->icsk_rto, + next_tmout = max(hctx->ccid3hctx_t_rto, (2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ @@ -1340,7 +1340,7 @@ static int ccid3_hc_tx_init(struct sock *sk) hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ - inet_csk(sk)->icsk_rto = USEC_PER_SEC; + hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); init_timer(&hctx->ccid3hctx_no_feedback_timer); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index d2705fb74195..5ef72cda7cd7 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -80,6 +80,7 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_rto; u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; From 2807d4ffb0dccb8f932c3e1701b6b6163153d333 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:33:48 -0300 Subject: [PATCH 440/584] [DCCP]: Fix seqno setting in dccp_v4_ctl_send_reset Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index cc5d60d9afa4..02ebf1f39f38 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -887,6 +887,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; + u64 seqno; /* Never send a reset in response to a reset. */ if (rxdh->dccph_type == DCCP_PKT_RESET) @@ -920,7 +921,12 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ + seqno = 0; + if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); + + dccp_hdr_set_seq(dh, seqno); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); From a3054d48b9b9d6290eccc9fc09c286ef450d9b1d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:35:18 -0300 Subject: [PATCH 441/584] [DCCP]: Give more info on Step 6 failure debug printk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 3c4cbff82e95..ce8396b126d2 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -136,8 +136,15 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed, " - "sending SYNC...\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " + "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " + "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " + "sending SYNC...\n", + dccp_packet_name(dh->dccph_type), + lswl, DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swh, + (DCCP_SKB_CB(skb)->dccpd_ack_seq == + DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", + lawl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } From 03ace394ac9bcad38043a381ae5f4860b9c9fa1c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:36:45 -0300 Subject: [PATCH 442/584] [DCCP]: Fix the ACK and SEQ window variables settings This is from a first audit, more eyeballs are more than welcome. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 12 ++++-------- net/dccp/input.c | 14 +++++++++++++- net/dccp/ipv4.c | 10 ++++++++++ net/dccp/minisocks.c | 13 +++++++++++++ net/dccp/timer.c | 2 +- 5 files changed, 41 insertions(+), 10 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index aab72b8d0703..33968a9422f2 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -340,13 +340,11 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - u64 tmp_gsr; - dccp_set_seqno(&tmp_gsr, + dp->dccps_gsr = seq; + dccp_set_seqno(&dp->dccps_swl, (dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4))); - dp->dccps_gsr = seq; - dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); dccp_set_seqno(&dp->dccps_swh, (dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4)); @@ -355,13 +353,11 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) static inline void dccp_update_gss(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - u64 tmp_gss; - dccp_set_seqno(&tmp_gss, + dp->dccps_awh = dp->dccps_gss = seq; + dccp_set_seqno(&dp->dccps_awl, (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); - dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); - dp->dccps_awh = dp->dccps_gss = seq; } extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); diff --git a/net/dccp/input.c b/net/dccp/input.c index ce8396b126d2..5847cf454e26 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -314,7 +314,19 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_update_gsr(sk, dp->dccps_isr); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + * + * AWL was adjusted in dccp_v4_connect -acme + */ + dccp_set_seqno(&dp->dccps_swl, + max48(dp->dccps_swl, dp->dccps_isr)); if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 02ebf1f39f38..647e669a1266 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -309,6 +309,16 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, usin->sin_port); dccp_update_gss(sk, dp->dccps_iss); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + */ + dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); + inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b8e67207e97e..ce5dff4ac22e 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -146,6 +146,19 @@ out_free: newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + */ + dccp_set_seqno(&newdp->dccps_swl, + max48(newdp->dccps_swl, newdp->dccps_isr)); + dccp_set_seqno(&newdp->dccps_awl, + max48(newdp->dccps_awl, newdp->dccps_iss)); + dccp_init_xmit_timers(newsk); DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 47b1616e6189..aa34b576e228 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -144,7 +144,7 @@ static void dccp_retransmit_timer(struct sock *sk) /* * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake + * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake * (PARTOPEN timer), etc). */ BUG_TRAP(sk->sk_send_head != NULL); From 24117727b753426d85ba09671c24854834f81b2c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:40:16 -0300 Subject: [PATCH 443/584] [DCCP]: Fix ackno setting in SYNC/SYNCACK packets Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/output.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/dccp/output.c b/net/dccp/output.c index 384fd0920983..708fc3c0a969 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -44,15 +44,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - /* - * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right - * thing to do here... - */ dccp_inc_seqno(&dp->dccps_gss); - dcb->dccpd_seq = dp->dccps_gss; - dccp_insert_options(sk, skb); - switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; @@ -62,6 +55,9 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) ackno = dcb->dccpd_seq; break; } + + dcb->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); From 7567662ba896ee0c33d6215f32e2011488a6d1bf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2005 23:30:34 -0700 Subject: [PATCH 444/584] [NETFILTER]: Add string match Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_string.h | 18 +++++ net/ipv4/netfilter/Kconfig | 12 +++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_string.c | 91 +++++++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_string.h create mode 100644 net/ipv4/netfilter/ipt_string.c diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h new file mode 100644 index 000000000000..a265f6e44eab --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_string.h @@ -0,0 +1,18 @@ +#ifndef _IPT_STRING_H +#define _IPT_STRING_H + +#define IPT_STRING_MAX_PATTERN_SIZE 128 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16 + +struct ipt_string_info +{ + u_int16_t from_offset; + u_int16_t to_offset; + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE]; + char pattern[IPT_STRING_MAX_PATTERN_SIZE]; + u_int8_t patlen; + u_int8_t invert; + struct ts_config __attribute__((aligned(8))) *config; +}; + +#endif /*_IPT_STRING_H*/ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3f7e6e49cbdd..f2bea6ecb226 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -410,6 +410,18 @@ config IP_NF_MATCH_HASHLIMIT destination IP' or `500pps from any given source IP' with a single IPtables rule. +config IP_NF_MATCH_STRING + tristate 'string match support' + depends on IP_NF_IPTABLES + select TEXTSEARCH + select TEXTSEARCH_KMP + select TEXTSEARCH_FSM + help + This option adds a `string' match, which allows you to look for + pattern matchings in packets. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 7c8ae858aa43..89cae69ee208 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o # targets obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c new file mode 100644 index 000000000000..b5def204d798 --- /dev/null +++ b/net/ipv4/netfilter/ipt_string.c @@ -0,0 +1,91 @@ +/* String matching match for iptables + * + * (C) 2005 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("IP tables string match module"); +MODULE_LICENSE("GPL"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + struct ts_state state; + struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo; + + memset(&state, 0, sizeof(struct ts_state)); + + return (skb_find_text((struct sk_buff *)skb, conf->from_offset, + conf->to_offset, conf->config, &state) + != UINT_MAX) && !conf->invert; +} + +#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m) + +static int checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_string_info *conf = matchinfo; + struct ts_config *ts_conf; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info))) + return 0; + + /* Damn, can't handle this case properly with iptables... */ + if (conf->from_offset > conf->to_offset) + return 0; + + ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, + GFP_KERNEL, TS_AUTOLOAD); + if (IS_ERR(ts_conf)) + return 0; + + conf->config = ts_conf; + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchsize) +{ + textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); +} + +static struct ipt_match string_match = { + .name = "string", + .match = match, + .checkentry = checkentry, + .destroy = destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&string_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&string_match); +} + +module_init(init); +module_exit(fini); From 764d8a9f240729534a1d8a0ffd39e722cf5cc5af Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:06 -0700 Subject: [PATCH 445/584] [NETFILTER]: Add IPv6 REJECT target Originally written by Yasuyuki Kozakai , taken from netfilter patch-o-matic and fixed up to work with current kernels. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_REJECT.h | 18 ++ net/ipv6/netfilter/Kconfig | 10 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 284 +++++++++++++++++++++ 4 files changed, 313 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_REJECT.h create mode 100644 net/ipv6/netfilter/ip6t_REJECT.c diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h new file mode 100644 index 000000000000..6be6504162bb --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -0,0 +1,18 @@ +#ifndef _IP6T_REJECT_H +#define _IP6T_REJECT_H + +enum ip6t_reject_with { + IP6T_ICMP6_NO_ROUTE, + IP6T_ICMP6_ADM_PROHIBITED, + IP6T_ICMP6_NOT_NEIGHBOUR, + IP6T_ICMP6_ADDR_UNREACH, + IP6T_ICMP6_PORT_UNREACH, + IP6T_ICMP6_ECHOREPLY, + IP6T_TCP_RESET +}; + +struct ip6t_reject_info { + u_int32_t with; /* reject type */ +}; + +#endif /*_IP6T_REJECT_H*/ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index cd1551983c63..8a10c2d0d154 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -199,6 +199,16 @@ config IP6_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP6_NF_FILTER + help + The REJECT target allows a filtering rule to specify that an ICMPv6 + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 847651dbcd2a..70f6ba610102 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,4 +24,5 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c new file mode 100644 index 000000000000..14316c3ebde4 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -0,0 +1,284 @@ +/* + * IP6 tables REJECT target module + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on net/ipv4/netfilter/ipt_REJECT.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Yasuyuki KOZAKAI "); +MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_LICENSE("GPL"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Send RST reply */ +static void send_reset(struct sk_buff *oldskb) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; + struct dst_entry *dst = NULL; + u8 proto; + struct flowi fl; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { + DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + DEBUGP("ip6t_REJECT: RST is set\n"); + return; + } + + /* Check checksum. */ + if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, + skb_checksum(oldskb, tcphoff, otcplen, 0))) { + DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + return; + } + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); + fl.fl_ip_sport = otcph.dest; + fl.fl_ip_dport = otcph.source; + dst = ip6_route_output(NULL, &fl); + if (dst == NULL) + return; + if (dst->error || + xfrm_lookup(&dst, &fl, NULL, 0)) { + dst_release(dst); + return; + } + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + if (net_ratelimit()) + printk("ip6t_REJECT: Can't alloc skb\n"); + dst_release(dst); + return; + } + + nskb->dst = dst; + + skb_reserve(nskb, hh_len + dst->header_len); + + ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) + skb_put(nskb, sizeof(struct ipv6hdr)); + ip6h->version = 6; + ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->payload_len = htons(sizeof(struct tcphdr)); + ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); + ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); + + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, + &nskb->nh.ipv6h->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial((char *)tcph, + sizeof(struct tcphdr), 0)); + + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, + dst_output); +} + +static inline void +send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) +{ + if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = &loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); +} + +static unsigned int reject6_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_reject_info *reject = targinfo; + + DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + /* WARNING: This code causes reentry within ip6tables. + This means that the ip6tables jump stack is now crap. We + must return an absolute verdict. --RR */ + switch (reject->with) { + case IP6T_ICMP6_NO_ROUTE: + send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + break; + case IP6T_ICMP6_ADM_PROHIBITED: + send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + break; + case IP6T_ICMP6_NOT_NEIGHBOUR: + send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + break; + case IP6T_ICMP6_ADDR_UNREACH: + send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + break; + case IP6T_ICMP6_PORT_UNREACH: + send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + break; + case IP6T_ICMP6_ECHOREPLY: + /* Do nothing */ + break; + case IP6T_TCP_RESET: + send_reset(*pskb); + break; + default: + if (net_ratelimit()) + printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); + break; + } + + return NF_DROP; +} + +static int check(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip6t_reject_info *rejinfo = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { + DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename); + return 0; + } + + if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN) + | (1 << NF_IP6_FORWARD) + | (1 << NF_IP6_LOCAL_OUT))) != 0) { + DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { + printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); + return 0; + } else if (rejinfo->with == IP6T_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ipv6.proto != IPPROTO_TCP + || (e->ipv6.invflags & IP6T_INV_PROTO)) { + DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ip6t_target ip6t_reject_reg = { + .name = "REJECT", + .target = reject6_target, + .checkentry = check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ip6t_register_target(&ip6t_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_reject_reg); +} + +module_init(init); +module_exit(fini); From 05465343bf74e00c8c2c5a310740157de3149f27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:43 -0700 Subject: [PATCH 446/584] [NETFILTER]: Add goto target Originally written by Henrik Nordstrom , taken from netfilter patch-o-matic and added ip6_tables support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 ++- include/linux/netfilter_ipv6/ip6_tables.h | 3 ++- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 12ce47808e7d..d19d65cf4530 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -109,7 +109,8 @@ struct ipt_counters /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ -#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ +#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ +#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f1ce3b009853..58c72a52dc65 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -111,7 +111,8 @@ struct ip6t_counters #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ #define IP6T_F_TOS 0x02 /* Match the TOS. */ -#define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ +#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ +#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ /* Values for "inv" field in struct ip6t_ip6. */ #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ff8d85d2070d..eef99a1b5de6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -340,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = (void *)e + e->next_offset; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 41a67cf6e33a..1cb8adb2787f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -433,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ipv6.flags & IP6T_F_GOTO)) { /* Save old back ptr in next entry */ struct ip6t_entry *next = (void *)e + e->next_offset; From 58e45131dc269eff0983c6d44494f9e687686900 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Aug 2005 23:46:01 -0700 Subject: [PATCH 447/584] [DCCP]: Fix printf format warnings on 64-bit. Signed-off-by: David S. Miller --- net/dccp/input.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 5847cf454e26..85402532e4e9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -141,10 +141,16 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - lswl, DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swh, + (unsigned long long) lswl, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) dp->dccps_swh, (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - lawl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + (unsigned long long) lawl, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } From 7ad07e7cf343181002c10c39d3f57a88e4903d4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:06 -0700 Subject: [PATCH 448/584] [DCCP]: Implement the CLOSING timer So that we retransmit CLOSE/CLOSEREQ packets till they elicit an answer or we hit a timeout. Most of the machinery uses TCP approaches, this code has to be polished & audited, but this is better than we had before. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 +- net/dccp/input.c | 26 +++++++++++--------------- net/dccp/output.c | 23 ++++++++++++----------- net/dccp/proto.c | 28 +++++++++++++++++++++++----- 4 files changed, 47 insertions(+), 32 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33968a9422f2..53994f10ced5 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -255,7 +255,7 @@ extern int dccp_v4_checksum(const struct sk_buff *skb, extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); -extern void dccp_send_close(struct sock *sk); +extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { __u8 dccpd_type; diff --git a/net/dccp/input.c b/net/dccp/input.c index 85402532e4e9..02af05ec23a2 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -31,14 +31,9 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb) static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) { - switch (sk->sk_state) { - case DCCP_PARTOPEN: - case DCCP_OPEN: - dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_CLOSED); - break; - } + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_CLOSED); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) @@ -54,13 +49,8 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) return; } - switch (sk->sk_state) { - case DCCP_PARTOPEN: - case DCCP_OPEN: - dccp_set_state(sk, DCCP_CLOSING); - dccp_send_close(sk); - break; - } + dccp_set_state(sk, DCCP_CLOSING); + dccp_send_close(sk, 0); } static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) @@ -562,6 +552,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); goto discard; + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { + dccp_rcv_closereq(sk, skb); + goto discard; + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_rcv_close(sk, skb); + return 0; } switch (sk->sk_state) { diff --git a/net/dccp/output.c b/net/dccp/output.c index 708fc3c0a969..630ca7741022 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -96,8 +96,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); - if (dcb->dccpd_type == DCCP_PKT_ACK || - dcb->dccpd_type == DCCP_PKT_DATAACK) + if (set_ack) dccp_event_ack_sent(sk); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); @@ -429,18 +428,15 @@ void dccp_send_sync(struct sock *sk, const u64 seq, * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under * any circumstances. */ -void dccp_send_close(struct sock *sk) +void dccp_send_close(struct sock *sk, const int active) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; + const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); - if (skb != NULL) - break; - yield(); - } + skb = alloc_skb(sk->sk_prot->max_header, prio); + if (skb == NULL) + return; /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); @@ -449,7 +445,12 @@ void dccp_send_close(struct sock *sk) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; skb_set_owner_w(skb, sk); - dccp_transmit_skb(sk, skb); + if (active) { + BUG_TRAP(sk->sk_send_head == NULL); + sk->sk_send_head = skb; + dccp_transmit_skb(sk, skb_clone(skb, prio)); + } else + dccp_transmit_skb(sk, skb); ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8b613c3017c5..a3f8a8095f81 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -402,12 +402,15 @@ void dccp_close(struct sock *sk, long timeout) /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (dccp_close_state(sk)) { - dccp_send_close(sk); + dccp_send_close(sk, 1); } sk_stream_wait_close(sk, timeout); adjudge_to_death: + /* + * It is the last release_sock in its life. It will remove backlog. + */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock @@ -419,11 +422,26 @@ adjudge_to_death: sock_hold(sk); sock_orphan(sk); - - if (sk->sk_state != DCCP_CLOSED) - dccp_set_state(sk, DCCP_CLOSED); - atomic_inc(&dccp_orphan_count); + /* + * The last release_sock may have processed the CLOSE or RESET + * packet moving sock to CLOSED state, if not we have to fire + * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" + * in draft-ietf-dccp-spec-11. -acme + */ + if (sk->sk_state == DCCP_CLOSING) { + /* FIXME: should start at 2 * RTT */ + /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); +#if 0 + /* Yeah, we should use sk->sk_prot->orphan_count, etc */ + dccp_set_state(sk, DCCP_CLOSED); +#endif + } + + atomic_inc(sk->sk_prot->orphan_count); if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); From 20472af986569b0615bd77f0fd7ca9e3d33e9895 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:21 -0700 Subject: [PATCH 449/584] [DCCP]: Fix skb leak in dccp_sendmsg Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a3f8a8095f81..2b6db18e607f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -206,6 +206,18 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out_discard; rc = dccp_write_xmit(sk, skb, len); + /* + * XXX we don't use sk_write_queue, so just discard the packet. + * Current plan however is to _use_ sk_write_queue with + * an algorith similar to tcp_sendmsg, where the main difference + * is that in DCCP we have to respect packet boundaries, so + * no coalescing of skbs. + * + * This bug was _quickly_ found & fixed by just looking at an OSTRA + * generated callgraph 8) -acme + */ + if (rc != 0) + goto out_discard; out_release: release_sock(sk); return rc ? : len; From a4beb1b64f5846e216bf2c439022df480151902a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:45 -0700 Subject: [PATCH 450/584] [DCCP]: Send a DATAACK packet when we have a TIMESTAMP_ECHO pending Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/output.c b/net/dccp/output.c index 630ca7741022..f96dedd3ad5e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -171,6 +171,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) * dccps_ack_pending or use icsk. */ } else if (inet_csk_ack_scheduled(sk) || + dp->dccps_timestamp_echo != 0 || (dp->dccps_options.dccpo_send_ack_vector && ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) From 012e13eac7579fcc7618df4ca1d5af3cdc03748c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:13 -0700 Subject: [PATCH 451/584] [CCID]: Make ccid_hc_[rt]x_exit accept NULL arguments Just like kfree, etc it will just not call the CCID exit routines when the private data area is set to NULL. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 95eb47d85517..c6767b282244 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -97,13 +97,15 @@ static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_rx_exit != NULL) + if (ccid->ccid_hc_rx_exit != NULL && + dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) ccid->ccid_hc_rx_exit(sk); } static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_tx_exit != NULL) + if (ccid->ccid_hc_tx_exit != NULL && + dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) ccid->ccid_hc_tx_exit(sk); } From d4b81ff70547b40c9b0742b163e8354560003cc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:36 -0700 Subject: [PATCH 452/584] [DCCP]: Export dccp_insert_option_timestamp to CCIDs And don't insert a TIMESTAMP option in all packets, leave the decision to the CCIDs. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 ++ net/dccp/options.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 53994f10ced5..c6ba07ea1a9f 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -364,6 +364,8 @@ extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time); +extern void dccp_insert_option_timestamp(struct sock *sk, + struct sk_buff *skb); extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char option, const void *value, unsigned char len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 7ecffdf85756..eabcc8f1c625 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -348,7 +348,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); } -EXPORT_SYMBOL(dccp_insert_option_elapsed_time); +EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { @@ -426,8 +426,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } -static inline void dccp_insert_option_timestamp(struct sock *sk, - struct sk_buff *skb) +void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; @@ -441,6 +440,8 @@ static inline void dccp_insert_option_timestamp(struct sock *sk, dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } +EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); + static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { @@ -504,7 +505,6 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - dccp_insert_option_timestamp(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } From 4fded33b3e8177d1d2eec0ccc69af8dfe8b4c3c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:59 -0700 Subject: [PATCH 453/584] [CCID3]: Calculate the RTT in the RX half connection Using TIMESTAMP_ECHO and ELAPSED_TIME options received. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 67 ++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ffd5b449f5f1..48c36afa4934 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1010,7 +1010,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct timeval now; -// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); BUG_ON(hctx == NULL); if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { @@ -1562,23 +1561,27 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { const struct dccp_sock *dp = dccp_sk(sk); + u32 x_recv, pinv; struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; - if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) - dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); - - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { - const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); - const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); - - dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); - dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); - } - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + + if (dccp_packet_without_ack(skb)) + return; + + if (hcrx->ccid3hcrx_elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, + hcrx->ccid3hcrx_elapsed_time); + dccp_insert_option_timestamp(sk, skb); + x_recv = htonl(hcrx->ccid3hcrx_x_recv); + pinv = htonl(hcrx->ccid3hcrx_pinv); + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, + &pinv, sizeof(pinv)); + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, + &x_recv, sizeof(x_recv)); } /* Weights used to calculate loss event rate */ @@ -1860,8 +1863,10 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; + u32 now_usecs; u8 win_count; u32 p_prev; int ins; @@ -1876,24 +1881,25 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + opt_recv = &dp->dccps_options_received; + switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) return; case DCCP_PKT_DATAACK: - if (dp->dccps_options_received.dccpor_timestamp_echo == 0) + if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; do_gettimeofday(&now); - /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - - usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); - FIXME - I think above code is broken - have to look at options more, will also need - to fix pr_debug below */ + now_usecs = now.tv_sec * USEC_PER_SEC + now.tv_usec; + hcrx->ccid3hcrx_rtt = now_usecs - + (opt_recv->dccpor_timestamp_echo - + opt_recv->dccpor_elapsed_time) * 10; if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", - dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, - dp->dccps_options_received.dccpor_timestamp_echo, - dp->dccps_options_received.dccpor_elapsed_time); + ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + dccp_role(sk), hcrx->ccid3hcrx_rtt, + opt_recv->dccpor_elapsed_time); break; case DCCP_PKT_DATA: break; @@ -1904,8 +1910,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, - dp->dccps_options_received.dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", @@ -1930,9 +1935,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; if (ins == 0) { - do_gettimeofday(&now); - if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { - hcrx->ccid3hcrx_tstamp_last_ack = now; + if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= + hcrx->ccid3hcrx_rtt) { + do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); ccid3_hc_rx_send_feedback(sk); } return; @@ -1946,8 +1951,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Dealing with packet loss */ - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state)); ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; @@ -1985,7 +1990,11 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); - + /* + * XXX this seems to be paranoid, need to think more about this, for + * now start with something different than zero. -acme + */ + hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; return 0; } From 2babe1f6fea717c36c008c878fe095d1ca5696c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:52:35 -0700 Subject: [PATCH 454/584] [DCCP]: Introduce dccp_get_info And also hc_tx and hc_rx get_info functions for the CCIDs to fill in information that is specific to them. For now reusing struct tcp_info, later I'll try to figure out a better solution, for now its really nice to get this kind of info: [root@qemu ~]# ./ss -danemi State Recv-Q Send-Q Local Addr:Port Peer Addr:Port LISTEN 0 0 *:5001 *:* ino:628 sk:c1340040 mem:(r0,w0,f0,t0) cwnd:0 ssthresh:0 ESTAB 0 0 172.20.0.2:5001 172.20.0.1:32785 ino:629 sk:c13409a0 mem:(r0,w0,f0,t0) ts rto:1000 rtt:0.004/0 cwnd:0 ssthresh:0 rcv_rtt:61.377 This, for instance, shows that we're not congestion controlling ACKs, as the above output is in the ttcp receiving host, and ttcp is a one way app, i.e. the received never calls sendmsg, so ccid_hc_tx_send_packet is never called, so the TX half connection stays in TFRC_SSTATE_NO_SENT state and hctx_rtt is never calculated, stays with the value set in ccid3_hc_tx_init, 4us, as show above in milliseconds (0.004ms), upcoming patches will fix this. rcv_rtt seems sane tho, matching ping results :-) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 18 ++++++++++++++++++ net/dccp/ccids/ccid3.c | 27 +++++++++++++++++++++++++++ net/dccp/diag.c | 26 +++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c6767b282244..962f1e9e2f7e 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -50,6 +50,10 @@ struct ccid { struct sk_buff *skb, int len); void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); + void (*ccid_hc_rx_get_info)(struct sock *sk, + struct tcp_info *info); + void (*ccid_hc_tx_get_info)(struct sock *sk, + struct tcp_info *info); }; extern int ccid_register(struct ccid *ccid); @@ -159,4 +163,18 @@ static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_rx_insert_options != NULL) ccid->ccid_hc_rx_insert_options(sk, skb); } + +static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_rx_get_info != NULL) + ccid->ccid_hc_rx_get_info(sk, info); +} + +static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_tx_get_info != NULL) + ccid->ccid_hc_tx_get_info(sk, info); +} #endif /* _CCID_H */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 48c36afa4934..fe4cc85f5bcc 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2020,6 +2020,31 @@ static void ccid3_hc_rx_exit(struct sock *sk) dp->dccps_hc_rx_ccid_private = NULL; } +static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + if (hcrx == NULL) + return; + + info->tcpi_ca_state = hcrx->ccid3hcrx_state; + info->tcpi_options |= TCPI_OPT_TIMESTAMPS; + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; +} + +static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx == NULL) + return; + + info->tcpi_rto = hctx->ccid3hctx_t_rto; + info->tcpi_rtt = hctx->ccid3hctx_rtt; +} + static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -2037,6 +2062,8 @@ static struct ccid ccid3 = { .ccid_hc_rx_exit = ccid3_hc_rx_exit, .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, + .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, + .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, }; module_param(ccid3_debug, int, 0444); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0b10c176c35a..f675d8e642d3 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -14,19 +14,43 @@ #include #include +#include "ccid.h" #include "dccp.h" +static void dccp_get_info(struct sock *sk, struct tcp_info *info) +{ + struct dccp_sock *dp = dccp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + + memset(info, 0, sizeof(*info)); + + info->tcpi_state = sk->sk_state; + info->tcpi_retransmits = icsk->icsk_retransmits; + info->tcpi_probes = icsk->icsk_probes_out; + info->tcpi_backoff = icsk->icsk_backoff; + info->tcpi_pmtu = dp->dccps_pmtu_cookie; + + if (dp->dccps_options.dccpo_send_ack_vector) + info->tcpi_options |= TCPI_OPT_SACK; + + ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); + ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); +} + static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { r->idiag_rqueue = r->idiag_wqueue = 0; + + if (_info != NULL) + dccp_get_info(sk, _info); } static struct inet_diag_handler dccp_diag_handler = { .idiag_hashinfo = &dccp_hashinfo, .idiag_get_info = dccp_diag_get_info, .idiag_type = DCCPDIAG_GETSOCK, - .idiag_info_size = 0, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) From dc40c7bc76054f5e4382835ca2bafb895b993a8a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:52:58 -0700 Subject: [PATCH 455/584] [ICSK]: Generalise tcp_listen_poll Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 10 ++++++++++ net/ipv4/tcp.c | 11 +---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4d7e708c07d1..8a87a3a4f107 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -260,6 +260,16 @@ extern void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long max_rto); extern void inet_csk_destroy_sock(struct sock *sk); + +/* + * LISTEN is a special case for poll.. + */ +static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +{ + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? + (POLLIN | POLLRDNORM) : 0; +} + extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 02848e72e9c1..68626de6d69c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -309,15 +309,6 @@ void tcp_enter_memory_pressure(void) EXPORT_SYMBOL(tcp_enter_memory_pressure); -/* - * LISTEN is a special case for poll.. - */ -static __inline__ unsigned int tcp_listen_poll(struct sock *sk, - poll_table *wait) -{ - return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; -} - /* * Wait for a TCP event. * @@ -333,7 +324,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == TCP_LISTEN) - return tcp_listen_poll(sk, wait); + return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events by poll logic and correct handling of state changes From 8efa544f9c84919c047dc2f96e308c902e8fb1a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:54:00 -0700 Subject: [PATCH 456/584] [DCCP]: Call the HC exit routines at dccp_v4_destroy_sock Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 647e669a1266..3cf2cbcdcafd 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1303,6 +1303,8 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); dp->dccps_hc_rx_ackpkts = NULL; ccid_exit(dp->dccps_hc_rx_ccid, sk); From 331968bd0c1b2437f3ad773cbf55f2e0737bafc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:54:23 -0700 Subject: [PATCH 457/584] [DCCP]: Initial dccp_poll implementation Tested with a patched netcat, no horror stories so far 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 1 + net/dccp/proto.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 02af05ec23a2..ef29cef1dafe 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -34,6 +34,7 @@ static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_fin(sk, skb); dccp_set_state(sk, DCCP_CLOSED); + sk_wake_async(sk, 1, POLL_HUP); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 2b6db18e607f..600dda51d995 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -140,6 +140,62 @@ int dccp_disconnect(struct sock *sk, int flags) return err; } +/* + * Wait for a DCCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +static unsigned int dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + unsigned int mask; + struct sock *sk = sock->sk; + + poll_wait(file, sk->sk_sleep, wait); + if (sk->sk_state == DCCP_LISTEN) + return inet_csk_listen_poll(sk); + + /* Socket is not locked. We are protected from async events + by poll logic and correct handling of state changes + made by another threads is impossible in any case. + */ + + mask = 0; + if (sk->sk_err) + mask = POLLERR; + + if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) + mask |= POLLHUP; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM; + + /* Connected? */ + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { + if (atomic_read(&sk->sk_rmem_alloc) > 0) + mask |= POLLIN | POLLRDNORM; + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + mask |= POLLOUT | POLLWRNORM; + } else { /* send SIGIO later */ + set_bit(SOCK_ASYNC_NOSPACE, + &sk->sk_socket->flags); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + + /* Race breaker. If space is freed after + * wspace test but before the flags are set, + * IO signal will be lost. + */ + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + mask |= POLLOUT | POLLWRNORM; + } + } + } + return mask; +} + int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { dccp_pr_debug("entry\n"); @@ -478,7 +534,8 @@ static struct proto_ops inet_dccp_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, - .poll = sock_no_poll, + /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ + .poll = dccp_poll, .ioctl = inet_ioctl, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, From 0c7770c740156c8802c23d24fc094d06967d997d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2005 21:59:41 -0700 Subject: [PATCH 458/584] [IPV4]: FIB trie cleanup This is a redo of earlier cleanup stuff: * replace DBG() macro with pr_debug() * get rid of duplicate extern's that are already in fib_lookup.h * use BUG_ON and WARN_ON * don't use BUG checks for null pointers where next statement would get a fault anyway * remove debug printout when rebalance causes deep tree * remove trailing blanks Signed-off-by: Stephen Hemminger Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 100 ++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 69 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 395f64df6f9a..9c4c7f0367b0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -157,10 +157,6 @@ struct trie { unsigned int revision; }; -static int trie_debug = 0; - -#define DBG(x...) do { if (trie_debug) printk(x); } while (0) - static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); static struct node *resize(struct trie *t, struct tnode *tn); @@ -168,12 +164,6 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); static void trie_dump_seq(struct seq_file *seq, struct trie *t); -extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); -extern int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int *dflt); - -extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); static kmem_cache_t *fn_alias_kmem; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -294,11 +284,9 @@ static void fn_free_alias(struct fib_alias *fa) */ -static void check_tnode(struct tnode *tn) +static inline void check_tnode(const struct tnode *tn) { - if (tn && tn->pos+tn->bits > 32) { - printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits); - } + WARN_ON(tn && tn->pos+tn->bits > 32); } static int halve_threshold = 25; @@ -374,21 +362,19 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) tn->empty_children = 1<= 1<bits) { - printk("bits=%d, i=%d\n", tn->bits, i); - BUG(); - } + BUG_ON(i >= 1<bits); + write_lock_bh(&fib_lock); chi = tn->child[i]; @@ -459,8 +443,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (!tn) return NULL; - DBG("In tnode_resize %p inflate_threshold=%d threshold=%d\n", - tn, inflate_threshold, halve_threshold); + pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", + tn, inflate_threshold, halve_threshold); /* No children */ if (tn->empty_children == tnode_child_length(tn)) { @@ -625,11 +609,11 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) int olen = tnode_child_length(tn); int i; - DBG("In inflate\n"); + pr_debug("In inflate\n"); tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); - if (!tn) + if (!tn) return ERR_PTR(-ENOMEM); /* @@ -749,12 +733,12 @@ nomem: int size = tnode_child_length(tn); int j; - for(j = 0; j < size; j++) + for (j = 0; j < size; j++) if (tn->child[j]) tnode_free((struct tnode *)tn->child[j]); tnode_free(tn); - + return ERR_PTR(-ENOMEM); } } @@ -766,7 +750,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) int i; int olen = tnode_child_length(tn); - DBG("In halve\n"); + pr_debug("In halve\n"); tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); @@ -785,14 +769,14 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) right = tnode_get_child(oldtnode, i+1); /* Two nonempty children */ - if (left && right) { + if (left && right) { struct tnode *newn; - + newn = tnode_new(left->key, tn->pos + tn->bits, 1); - - if (!newn) + + if (!newn) goto nomem; - + put_child(t, tn, i/2, (struct node *)newn); } @@ -810,7 +794,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) continue; put_child(t, tn, i/2, right); continue; - } + } if (right == NULL) { put_child(t, tn, i/2, left); @@ -820,9 +804,6 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) /* Two nonempty children */ newBinNode = (struct tnode *) tnode_get_child(tn, i/2); put_child(t, tn, i/2, NULL); - - BUG_ON(!newBinNode); - put_child(t, newBinNode, 0, left); put_child(t, newBinNode, 1, right); put_child(t, tn, i/2, resize(t, newBinNode)); @@ -834,12 +815,12 @@ nomem: int size = tnode_child_length(tn); int j; - for(j = 0; j < size; j++) + for (j = 0; j < size; j++) if (tn->child[j]) tnode_free((struct tnode *)tn->child[j]); tnode_free(tn); - + return ERR_PTR(-ENOMEM); } } @@ -939,22 +920,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) t_key cindex, key; struct tnode *tp = NULL; - BUG_ON(!tn); - key = tn->key; i = 0; while (tn != NULL && NODE_PARENT(tn) != NULL) { - if (i > 10) { - printk("Rebalance tn=%p \n", tn); - if (tn) - printk("tn->parent=%p \n", NODE_PARENT(tn)); - - printk("Rebalance tp=%p \n", tp); - if (tp) - printk("tp->parent=%p \n", NODE_PARENT(tp)); - } - BUG_ON(i > 12); /* Why is this a bug? -ojn */ i++; @@ -1019,10 +988,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); - if (n && NODE_PARENT(n) != tn) { - printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); - BUG(); - } + BUG_ON(n && NODE_PARENT(n) != tn); } else break; } @@ -1076,8 +1042,6 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) NODE_SET_PARENT(l, tp); - BUG_ON(!tp); - cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)l); } else { @@ -1158,7 +1122,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = ntohl(key); - DBG("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1282,7 +1246,8 @@ err: return err; } -static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, +static inline int check_leaf(struct trie *t, struct leaf *l, + t_key key, int *plen, const struct flowi *flp, struct fib_result *res) { int err, i; @@ -1511,7 +1476,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) struct node *n = t->trie; struct leaf *l; - DBG("entering trie_leaf_remove(%p)\n", n); + pr_debug("entering trie_leaf_remove(%p)\n", n); /* Note that in the case skipped bits, those bits are *not* checked! * When we finish this, we will have NULL or a T_LEAF, and the @@ -1523,10 +1488,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); - if (n && NODE_PARENT(n) != tn) { - printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); - BUG(); - } + BUG_ON(n && NODE_PARENT(n) != tn); } l = (struct leaf *) n; @@ -1594,7 +1556,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!fa) return -ESRCH; - DBG("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); + pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; fa_head = fa->fa_list.prev; @@ -1762,7 +1724,7 @@ static int fn_trie_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); - DBG("trie_flush found=%d\n", found); + pr_debug("trie_flush found=%d\n", found); return found; } From 98a82febb6340466824c3a453738d4fbd05db81a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 24 Aug 2005 11:35:51 -0700 Subject: [PATCH 459/584] [AX25/NETROM]: Cleanup direct calls into IP stack Get rid of the calls to ip_rcv and arp_rcv which were layering violations anyway. With those being replaced by netif_rx, less parts of AX.25 and relatives depend on INET support actually being enabled. This also will make PF_PACKET sockets work for IP and ARP packets received over AX.25 and for IP packets over NET/ROM. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- net/ax25/ax25_in.c | 13 +++---------- net/netrom/af_netrom.c | 5 ++--- net/netrom/nr_dev.c | 5 ++--- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 0357705d575d..810c9c76c2e0 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -9,7 +9,6 @@ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) */ -#include #include #include #include @@ -26,9 +25,7 @@ #include #include #include -#include /* For ip_rcv */ #include -#include /* For arp_rcv */ #include #include #include @@ -114,7 +111,6 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) pid = *skb->data; -#ifdef CONFIG_INET if (pid == AX25_P_IP) { /* working around a TCP bug to keep additional listeners * happy. TCP re-uses the buffer and destroys the original @@ -132,10 +128,9 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, skb->dev, NULL, skb->dev); /* Wrong ptype */ + netif_rx(skb); return 1; } -#endif if (pid == AX25_P_SEGMENT) { skb_pull(skb, 1); /* Remove PID */ return ax25_rx_fragment(ax25, skb); @@ -250,7 +245,6 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, /* Now we are pointing at the pid byte */ switch (skb->data[1]) { -#ifdef CONFIG_INET case AX25_P_IP: skb_pull(skb,2); /* drop PID/CTRL */ skb->h.raw = skb->data; @@ -258,7 +252,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, dev, ptype, dev); /* Note ptype here is the wrong one, fix me later */ + netif_rx(skb); break; case AX25_P_ARP: @@ -268,9 +262,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_ARP); - arp_rcv(skb, dev, ptype, dev); /* Note ptype here is wrong... */ + netif_rx(skb); break; -#endif case AX25_P_TEXT: /* Now find a suitable dgram socket */ sk = ax25_get_socket(&dest, &src, SOCK_DGRAM); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 9aa8b14a81ab..4b53de982114 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -858,17 +858,16 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) frametype = skb->data[19] & 0x0F; flags = skb->data[19] & 0xF0; -#ifdef CONFIG_INET /* * Check for an incoming IP over NET/ROM frame. */ - if (frametype == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { + if (frametype == NR_PROTOEXT && + circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); skb->h.raw = skb->data; return nr_rx_ip(skb, dev); } -#endif /* * Find an existing socket connection, based on circuit ID, if it's diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 83eb41d9b937..263da4c26494 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -38,8 +38,6 @@ #include #include -#ifdef CONFIG_INET - /* * Only allow IP over NET/ROM frames through if the netrom device is up. */ @@ -64,11 +62,12 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; - ip_rcv(skb, skb->dev, NULL, skb->dev); + netif_rx(skb); return 1; } +#ifdef CONFIG_INET static int nr_rebuild_header(struct sk_buff *skb) { From c91326db013ddff478da8a8d66fb99ef4579f19a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 24 Aug 2005 11:37:45 -0700 Subject: [PATCH 460/584] [AX25/NETROM/ROSE]: Kill net/ip.h inclusion All these are claiming to include to get ip_rcv() but in fact don't need the header at all, so away with the inclusion. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- net/ax25/ax25_ds_in.c | 1 - net/ax25/ax25_std_in.c | 1 - net/netrom/nr_in.c | 1 - net/rose/rose_in.c | 1 - 4 files changed, 4 deletions(-) diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 5d0f8fb9d7aa..edcaa897027c 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -22,7 +22,6 @@ #include #include #include -#include /* For ip_rcv */ #include #include #include diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 83a33387c061..f6ed283e9de8 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -29,7 +29,6 @@ #include #include #include -#include /* For ip_rcv */ #include #include #include diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2fcba9e24b2e..64b81a796907 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -23,7 +23,6 @@ #include #include #include -#include /* For ip_rcv */ #include #include #include diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index a52417bd0a16..8348d33f1efe 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -26,7 +26,6 @@ #include #include #include -#include /* For ip_rcv */ #include #include #include From 3625796806419d97641d90e0f197eab9b952212e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 24 Aug 2005 11:38:53 -0700 Subject: [PATCH 461/584] [IPV4]: Module export of ip_rcv() no longer needed. With ip_rcv nowhere outside the IP stack being used anymore it's EXPORT_SYMBOL is not needed any longer either. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7e78095baef1..220a8b5920ea 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -442,5 +442,4 @@ out: return NET_RX_DROP; } -EXPORT_SYMBOL(ip_rcv); EXPORT_SYMBOL(ip_statistics); From e5b4376074e02b783e56a8f7c42d544e18112c4e Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 25 Aug 2005 13:01:03 -0700 Subject: [PATCH 462/584] [IPV4]: Prepare FIB core for RCU. * RCU versions of hlist_***_rcu * fib_alias partial rcu port just whats needed now. Signed-off-by: Robert Olsson Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/list.h | 21 +++++++++++++++++++++ net/ipv4/fib_lookup.h | 1 + net/ipv4/fib_semantics.c | 3 ++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/linux/list.h b/include/linux/list.h index 0f2435f92db3..9b9b0eec1e8a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,27 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index b729d97cfa93..ef6609ea0eb7 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -7,6 +7,7 @@ struct fib_alias { struct list_head fa_list; + struct rcu_head rcu; struct fib_info *fa_info; u8 fa_tos; u8 fa_type; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7e4651b3caa8..d41219e8037c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -854,6 +854,7 @@ failure: return NULL; } +/* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, struct fib_result *res, __u32 zone, __u32 mask, int prefixlen) @@ -861,7 +862,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, struct fib_alias *fa; int nh_sel = 0; - list_for_each_entry(fa, head, fa_list) { + list_for_each_entry_rcu(fa, head, fa_list) { int err; if (fa->fa_tos && From 2373ce1ca04dd46bf2b8b0f9a799eb2a90da92fb Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 25 Aug 2005 13:01:29 -0700 Subject: [PATCH 463/584] [IPV4]: Convert FIB Trie to RCU. * Removes RW-lock * Proteced read functions uses rcu_dereference proteced with rcu_read_lock() * writing of procted pointer w. rcu_assigen_pointer * Insert/Replace atomic list_replace_rcu * A BUG_ON condition removed.in trie_rebalance With help from Paul E. McKenney. Signed-off-by: Robert Olsson Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 413 +++++++++++++++++++++++--------------------- 1 file changed, 214 insertions(+), 199 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9c4c7f0367b0..ff21748248e4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.325" +#define VERSION "0.402" #include #include @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -81,22 +82,19 @@ #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) -static DEFINE_RWLOCK(fib_lock); - typedef unsigned int t_key; #define T_TNODE 0 #define T_LEAF 1 #define NODE_TYPE_MASK 0x1UL #define NODE_PARENT(node) \ - ((struct tnode *)((node)->parent & ~NODE_TYPE_MASK)) -#define NODE_SET_PARENT(node, ptr) \ - ((node)->parent = (((unsigned long)(ptr)) | \ - ((node)->parent & NODE_TYPE_MASK))) -#define NODE_INIT_PARENT(node, type) \ - ((node)->parent = (type)) -#define NODE_TYPE(node) \ - ((node)->parent & NODE_TYPE_MASK) + ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) + +#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) + +#define NODE_SET_PARENT(node, ptr) \ + rcu_assign_pointer((node)->parent, \ + ((unsigned long)(ptr)) | NODE_TYPE(node)) #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) @@ -110,10 +108,12 @@ struct leaf { t_key key; unsigned long parent; struct hlist_head list; + struct rcu_head rcu; }; struct leaf_info { struct hlist_node hlist; + struct rcu_head rcu; int plen; struct list_head falh; }; @@ -125,6 +125,7 @@ struct tnode { unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ unsigned short full_children; /* KEYLENGTH bits needed */ unsigned short empty_children; /* KEYLENGTH bits needed */ + struct rcu_head rcu; struct node *child[0]; }; @@ -168,11 +169,14 @@ static void trie_dump_seq(struct seq_file *seq, struct trie *t); static kmem_cache_t *fn_alias_kmem; static struct trie *trie_local = NULL, *trie_main = NULL; + +/* rcu_read_lock needs to be hold by caller from readside */ + static inline struct node *tnode_get_child(struct tnode *tn, int i) { BUG_ON(i >= 1 << tn->bits); - return tn->child[i]; + return rcu_dereference(tn->child[i]); } static inline int tnode_child_length(const struct tnode *tn) @@ -213,14 +217,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) return i; } -/* Candidate for fib_semantics */ - -static void fn_free_alias(struct fib_alias *fa) -{ - fib_release_info(fa->fa_info); - kmem_cache_free(fn_alias_kmem, fa); -} - /* To understand this stuff, an understanding of keys and all their bits is necessary. Every node in the trie has a key associated with it, but not @@ -292,11 +288,74 @@ static inline void check_tnode(const struct tnode *tn) static int halve_threshold = 25; static int inflate_threshold = 50; + +static void __alias_free_mem(struct rcu_head *head) +{ + struct fib_alias *fa = container_of(head, struct fib_alias, rcu); + kmem_cache_free(fn_alias_kmem, fa); +} + +static inline void alias_free_mem_rcu(struct fib_alias *fa) +{ + call_rcu(&fa->rcu, __alias_free_mem); +} + +static void __leaf_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct leaf, rcu)); +} + +static inline void free_leaf(struct leaf *leaf) +{ + call_rcu(&leaf->rcu, __leaf_free_rcu); +} + +static void __leaf_info_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct leaf_info, rcu)); +} + +static inline void free_leaf_info(struct leaf_info *leaf) +{ + call_rcu(&leaf->rcu, __leaf_info_free_rcu); +} + +static struct tnode *tnode_alloc(unsigned int size) +{ + struct page *pages; + + if (size <= PAGE_SIZE) + return kcalloc(size, 1, GFP_KERNEL); + + pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); + if (!pages) + return NULL; + + return page_address(pages); +} + +static void __tnode_free_rcu(struct rcu_head *head) +{ + struct tnode *tn = container_of(head, struct tnode, rcu); + unsigned int size = sizeof(struct tnode) + + (1 << tn->bits) * sizeof(struct node *); + + if (size <= PAGE_SIZE) + kfree(tn); + else + free_pages((unsigned long)tn, get_order(size)); +} + +static inline void tnode_free(struct tnode *tn) +{ + call_rcu(&tn->rcu, __tnode_free_rcu); +} + static struct leaf *leaf_new(void) { struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); if (l) { - NODE_INIT_PARENT(l, T_LEAF); + l->parent = T_LEAF; INIT_HLIST_HEAD(&l->list); } return l; @@ -305,45 +364,11 @@ static struct leaf *leaf_new(void) static struct leaf_info *leaf_info_new(int plen) { struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); - - if (!li) - return NULL; - - li->plen = plen; - INIT_LIST_HEAD(&li->falh); - - return li; -} - -static inline void free_leaf(struct leaf *l) -{ - kfree(l); -} - -static inline void free_leaf_info(struct leaf_info *li) -{ - kfree(li); -} - -static struct tnode *tnode_alloc(unsigned int size) -{ - if (size <= PAGE_SIZE) { - return kmalloc(size, GFP_KERNEL); - } else { - return (struct tnode *) - __get_free_pages(GFP_KERNEL, get_order(size)); + if (li) { + li->plen = plen; + INIT_LIST_HEAD(&li->falh); } -} - -static void __tnode_free(struct tnode *tn) -{ - unsigned int size = sizeof(struct tnode) + - (1 << tn->bits) * sizeof(struct node *); - - if (size <= PAGE_SIZE) - kfree(tn); - else - free_pages((unsigned long)tn, get_order(size)); + return li; } static struct tnode* tnode_new(t_key key, int pos, int bits) @@ -354,7 +379,7 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) if (tn) { memset(tn, 0, sz); - NODE_INIT_PARENT(tn, T_TNODE); + tn->parent = T_TNODE; tn->pos = pos; tn->bits = bits; tn->key = key; @@ -367,17 +392,6 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) return tn; } -static void tnode_free(struct tnode *tn) -{ - if (IS_LEAF(tn)) { - free_leaf((struct leaf *)tn); - pr_debug("FL %p \n", tn); - } else { - __tnode_free(tn); - pr_debug("FT %p \n", tn); - } -} - /* * Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 @@ -403,13 +417,11 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) { - struct node *chi; + struct node *chi = tn->child[i]; int isfull; BUG_ON(i >= 1<bits); - write_lock_bh(&fib_lock); - chi = tn->child[i]; /* update emptyChildren */ if (n == NULL && chi != NULL) @@ -430,8 +442,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w if (n) NODE_SET_PARENT(n, tn); - tn->child[i] = n; - write_unlock_bh(&fib_lock); + rcu_assign_pointer(tn->child[i], n); } static struct node *resize(struct trie *t, struct tnode *tn) @@ -456,17 +467,12 @@ static struct node *resize(struct trie *t, struct tnode *tn) for (i = 0; i < tnode_child_length(tn); i++) { struct node *n; - write_lock_bh(&fib_lock); n = tn->child[i]; - if (!n) { - write_unlock_bh(&fib_lock); + if (!n) continue; - } /* compress one level */ - NODE_INIT_PARENT(n, NODE_TYPE(n)); - - write_unlock_bh(&fib_lock); + NODE_SET_PARENT(n, NULL); tnode_free(tn); return n; } @@ -577,24 +583,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) for (i = 0; i < tnode_child_length(tn); i++) { struct node *n; - write_lock_bh(&fib_lock); - n = tn->child[i]; - if (!n) { - write_unlock_bh(&fib_lock); + if (!n) continue; - } /* compress one level */ - NODE_INIT_PARENT(n, NODE_TYPE(n)); - - write_unlock_bh(&fib_lock); + NODE_SET_PARENT(n, NULL); tnode_free(tn); return n; } @@ -831,19 +830,22 @@ static void trie_init(struct trie *t) return; t->size = 0; - t->trie = NULL; + rcu_assign_pointer(t->trie, NULL); t->revision = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS memset(&t->stats, 0, sizeof(struct trie_use_stats)); #endif } +/* readside most use rcu_read_lock currently dump routines + via get_fa_head and dump */ + static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) { struct hlist_node *node; struct leaf_info *li; - hlist_for_each_entry(li, node, head, hlist) + hlist_for_each_entry_rcu(li, node, head, hlist) if (li->plen == plen) return li; @@ -862,28 +864,27 @@ static inline struct list_head * get_fa_head(struct leaf *l, int plen) static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) { - struct leaf_info *li = NULL, *last = NULL; - struct hlist_node *node; + struct leaf_info *li = NULL, *last = NULL; + struct hlist_node *node; - write_lock_bh(&fib_lock); + if (hlist_empty(head)) { + hlist_add_head_rcu(&new->hlist, head); + } else { + hlist_for_each_entry(li, node, head, hlist) { + if (new->plen > li->plen) + break; - if (hlist_empty(head)) { - hlist_add_head(&new->hlist, head); - } else { - hlist_for_each_entry(li, node, head, hlist) { - if (new->plen > li->plen) - break; - - last = li; - } - if (last) - hlist_add_after(&last->hlist, &new->hlist); - else - hlist_add_before(&new->hlist, &li->hlist); - } - write_unlock_bh(&fib_lock); + last = li; + } + if (last) + hlist_add_after_rcu(&last->hlist, &new->hlist); + else + hlist_add_before_rcu(&new->hlist, &li->hlist); + } } +/* rcu_read_lock needs to be hold by caller from readside */ + static struct leaf * fib_find_node(struct trie *t, u32 key) { @@ -892,7 +893,7 @@ fib_find_node(struct trie *t, u32 key) struct node *n; pos = 0; - n = t->trie; + n = rcu_dereference(t->trie); while (n != NULL && NODE_TYPE(n) == T_TNODE) { tn = (struct tnode *) n; @@ -915,17 +916,13 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { - int i; int wasfull; t_key cindex, key; struct tnode *tp = NULL; key = tn->key; - i = 0; while (tn != NULL && NODE_PARENT(tn) != NULL) { - BUG_ON(i > 12); /* Why is this a bug? -ojn */ - i++; tp = NODE_PARENT(tn); cindex = tkey_extract_bits(key, tp->pos, tp->bits); @@ -945,6 +942,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) return (struct node*) tn; } +/* only used from updater-side */ + static struct list_head * fib_insert_node(struct trie *t, int *err, u32 key, int plen) { @@ -1081,7 +1080,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); } else { - t->trie = (struct node*) tn; /* First tnode */ + rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ tp = tn; } } @@ -1091,7 +1090,8 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ - t->trie = trie_rebalance(t, tp); + + rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); done: t->revision++; err: @@ -1166,16 +1166,21 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_info *fi_drop; u8 state; - write_lock_bh(&fib_lock); + err = -ENOBUFS; + new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + if (new_fa == NULL) + goto out; fi_drop = fa->fa_info; - fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + new_fa->fa_tos = fa->fa_tos; + new_fa->fa_info = fi; + new_fa->fa_type = type; + new_fa->fa_scope = r->rtm_scope; state = fa->fa_state; - fa->fa_state &= ~FA_S_ACCESSED; + new_fa->fa_state &= ~FA_S_ACCESSED; - write_unlock_bh(&fib_lock); + list_replace_rcu(&fa->fa_list, &new_fa->fa_list); + alias_free_mem_rcu(fa); fib_release_info(fi_drop); if (state & FA_S_ACCESSED) @@ -1227,11 +1232,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, goto out_free_new_fa; } - write_lock_bh(&fib_lock); - - list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - - write_unlock_bh(&fib_lock); + list_add_tail_rcu(&new_fa->fa_list, + (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); @@ -1246,6 +1248,8 @@ err: return err; } + +/* should be clalled with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res) @@ -1256,7 +1260,7 @@ static inline int check_leaf(struct trie *t, struct leaf *l, struct hlist_head *hhead = &l->list; struct hlist_node *node; - hlist_for_each_entry(li, node, hhead, hlist) { + hlist_for_each_entry_rcu(li, node, hhead, hlist) { i = li->plen; mask = ntohl(inet_make_mask(i)); if (l->key != (key & mask)) @@ -1292,10 +1296,9 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result t_key node_prefix, key_prefix, pref_mismatch; int mp; - n = t->trie; - - read_lock(&fib_lock); + rcu_read_lock(); + n = rcu_dereference(t->trie); if (!n) goto failed; @@ -1465,10 +1468,11 @@ backtrace: failed: ret = 1; found: - read_unlock(&fib_lock); + rcu_read_unlock(); return ret; } +/* only called from updater side */ static int trie_leaf_remove(struct trie *t, t_key key) { t_key cindex; @@ -1503,15 +1507,17 @@ static int trie_leaf_remove(struct trie *t, t_key key) t->revision++; t->size--; + preempt_disable(); tp = NODE_PARENT(n); tnode_free((struct tnode *) n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - t->trie = trie_rebalance(t, tp); + rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); } else - t->trie = NULL; + rcu_assign_pointer(t->trie, NULL); + preempt_enable(); return 1; } @@ -1527,7 +1533,6 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; - int kill_li = 0; struct leaf_info *li; @@ -1560,6 +1565,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fa_to_delete = NULL; fa_head = fa->fa_list.prev; + list_for_each_entry(fa, fa_head, fa_list) { struct fib_info *fi = fa->fa_info; @@ -1587,18 +1593,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, l = fib_find_node(t, key); li = find_leaf_info(&l->list, plen); - write_lock_bh(&fib_lock); - - list_del(&fa->fa_list); + list_del_rcu(&fa->fa_list); if (list_empty(fa_head)) { - hlist_del(&li->hlist); - kill_li = 1; - } - write_unlock_bh(&fib_lock); - - if (kill_li) + hlist_del_rcu(&li->hlist); free_leaf_info(li); + } if (hlist_empty(&l->list)) trie_leaf_remove(t, key); @@ -1606,7 +1606,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (fa->fa_state & FA_S_ACCESSED) rt_cache_flush(-1); - fn_free_alias(fa); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); return 0; } @@ -1618,12 +1619,10 @@ static int trie_flush_list(struct trie *t, struct list_head *head) list_for_each_entry_safe(fa, fa_node, head, fa_list) { struct fib_info *fi = fa->fa_info; - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { - write_lock_bh(&fib_lock); - list_del(&fa->fa_list); - write_unlock_bh(&fib_lock); - - fn_free_alias(fa); + if (fi && (fi->fib_flags & RTNH_F_DEAD)) { + list_del_rcu(&fa->fa_list); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); found++; } } @@ -1641,30 +1640,30 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l) found += trie_flush_list(t, &li->falh); if (list_empty(&li->falh)) { - write_lock_bh(&fib_lock); - hlist_del(&li->hlist); - write_unlock_bh(&fib_lock); - + hlist_del_rcu(&li->hlist); free_leaf_info(li); } } return found; } +/* rcu_read_lock needs to be hold by caller from readside */ + static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) { struct node *c = (struct node *) thisleaf; struct tnode *p; int idx; + struct node *trie = rcu_dereference(t->trie); if (c == NULL) { - if (t->trie == NULL) + if (trie == NULL) return NULL; - if (IS_LEAF(t->trie)) /* trie w. just a leaf */ - return (struct leaf *) t->trie; + if (IS_LEAF(trie)) /* trie w. just a leaf */ + return (struct leaf *) trie; - p = (struct tnode*) t->trie; /* Start */ + p = (struct tnode*) trie; /* Start */ } else p = (struct tnode *) NODE_PARENT(c); @@ -1679,23 +1678,26 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) last = 1 << p->bits; for (idx = pos; idx < last ; idx++) { - if (!p->child[idx]) + c = rcu_dereference(p->child[idx]); + + if (!c) continue; /* Decend if tnode */ - while (IS_TNODE(p->child[idx])) { - p = (struct tnode*) p->child[idx]; - idx = 0; + while (IS_TNODE(c)) { + p = (struct tnode *) c; + idx = 0; /* Rightmost non-NULL branch */ if (p && IS_TNODE(p)) - while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++; + while (!(c = rcu_dereference(p->child[idx])) + && idx < (1<bits)) idx++; /* Done with this tnode? */ - if (idx >= (1 << p->bits) || p->child[idx] == NULL) + if (idx >= (1 << p->bits) || !c) goto up; } - return (struct leaf*) p->child[idx]; + return (struct leaf *) c; } up: /* No more children go up one step */ @@ -1713,6 +1715,7 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; + rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1720,6 +1723,7 @@ static int fn_trie_flush(struct fib_table *tb) trie_leaf_remove(t, ll->key); ll = l; } + rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -1745,7 +1749,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib last_resort = NULL; order = -1; - read_lock(&fib_lock); + rcu_read_lock(); l = fib_find_node(t, 0); if (!l) @@ -1758,7 +1762,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib if (list_empty(fa_head)) goto out; - list_for_each_entry(fa, fa_head, fa_list) { + list_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; if (fa->fa_scope != res->scope || @@ -1809,7 +1813,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib } trie_last_dflt = last_idx; out:; - read_unlock(&fib_lock); + rcu_read_unlock(); } static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, @@ -1823,7 +1827,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi s_i = cb->args[3]; i = 0; - list_for_each_entry(fa, fah, fa_list) { + /* rcu_read_lock is hold by caller */ + + list_for_each_entry_rcu(fa, fah, fa_list) { if (i < s_i) { i++; continue; @@ -1898,7 +1904,7 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin s_m = cb->args[1]; - read_lock(&fib_lock); + rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; @@ -1911,11 +1917,11 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin goto out; } } - read_unlock(&fib_lock); + rcu_read_unlock(); cb->args[1] = m; return skb->len; out: - read_unlock(&fib_lock); + rcu_read_unlock(); return -1; } @@ -2016,7 +2022,7 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, putspace_seq(seq, indent+2); seq_printf(seq, "{/%d...dumping}\n", i); - list_for_each_entry(fa, fa_head, fa_list) { + list_for_each_entry_rcu(fa, fa_head, fa_list) { putspace_seq(seq, indent+2); if (fa->fa_info == NULL) { seq_printf(seq, "Error fa_info=NULL\n"); @@ -2056,28 +2062,28 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, static void trie_dump_seq(struct seq_file *seq, struct trie *t) { - struct node *n = t->trie; + struct node *n; int cindex = 0; int indent = 1; int pend = 0; int depth = 0; struct tnode *tn; - read_lock(&fib_lock); - + rcu_read_lock(); + n = rcu_dereference(t->trie); seq_printf(seq, "------ trie_dump of t=%p ------\n", t); if (!n) { seq_printf(seq, "------ trie is empty\n"); - read_unlock(&fib_lock); + rcu_read_unlock(); return; } printnode_seq(seq, indent, n, pend, cindex, 0); if (!IS_TNODE(n)) { - read_unlock(&fib_lock); + rcu_read_unlock(); return; } @@ -2088,26 +2094,32 @@ static void trie_dump_seq(struct seq_file *seq, struct trie *t) depth++; while (tn && cindex < (1 << tn->bits)) { - if (tn->child[cindex]) { + struct node *child = rcu_dereference(tn->child[cindex]); + if (!child) + cindex++; + else { /* Got a child */ + printnode_seq(seq, indent, child, pend, + cindex, tn->bits); - printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); - if (IS_LEAF(tn->child[cindex])) { + if (IS_LEAF(child)) cindex++; - } else { + + else { /* * New tnode. Decend one level */ depth++; - tn = (struct tnode *)tn->child[cindex]; - pend = tn->pos + tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); + n = child; + tn = (struct tnode *)n; + pend = tn->pos+tn->bits; + putspace_seq(seq, indent); + seq_printf(seq, "\\--\n"); indent += 3; cindex = 0; } - } else - cindex++; + } /* * Test if we are done @@ -2132,8 +2144,7 @@ static void trie_dump_seq(struct seq_file *seq, struct trie *t) depth--; } } - - read_unlock(&fib_lock); + rcu_read_unlock(); } static struct trie_stat *trie_stat_new(void) @@ -2159,7 +2170,7 @@ static struct trie_stat *trie_stat_new(void) static struct trie_stat *trie_collect_stats(struct trie *t) { - struct node *n = t->trie; + struct node *n; struct trie_stat *s = trie_stat_new(); int cindex = 0; int pend = 0; @@ -2167,11 +2178,13 @@ static struct trie_stat *trie_collect_stats(struct trie *t) if (!s) return NULL; + + rcu_read_lock(); + n = rcu_dereference(t->trie); + if (!n) return s; - read_lock(&fib_lock); - if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *)n; pend = tn->pos+tn->bits; @@ -2179,7 +2192,9 @@ static struct trie_stat *trie_collect_stats(struct trie *t) depth++; while (tn && cindex < (1 << tn->bits)) { - if (tn->child[cindex]) { + struct node *ch = rcu_dereference(tn->child[cindex]); + if (ch) { + /* Got a child */ if (IS_LEAF(tn->child[cindex])) { @@ -2199,7 +2214,7 @@ static struct trie_stat *trie_collect_stats(struct trie *t) s->nodesizes[tn->bits]++; depth++; - n = tn->child[cindex]; + n = ch; tn = (struct tnode *)n; pend = tn->pos+tn->bits; @@ -2236,7 +2251,7 @@ static struct trie_stat *trie_collect_stats(struct trie *t) } } - read_unlock(&fib_lock); + rcu_read_unlock(); return s; } From afdc08b9f9a7174d7912a160f657f39d46379b5e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:34:29 -0700 Subject: [PATCH 464/584] [BNX2]: Fix rtnl deadlock in bnx2_close This fixes an rtnl deadlock problem when flush_scheduled_work() is called from bnx2_close(). In rare cases, linkwatch_event() may be on the workqueue from a previous close of a different device and it will try to get the rtnl lock which is already held by dev_close(). The fix is to set a flag if we are in the reset task which is run from the workqueue. bnx2_close() will loop until the flag is cleared. As suggested by Jeff Garzik, the loop is changed to call msleep(1) instead of yield() in the original patch. flush_scheduled_work() is also moved to bnx2_remove_one() before the netdev is freed. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 15 ++++++++++++++- drivers/net/bnx2.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 3a9d6a8b90a2..635a5856102b 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3975,12 +3975,17 @@ bnx2_reset_task(void *data) { struct bnx2 *bp = data; + if (!netif_running(bp->dev)) + return; + + bp->in_reset_task = 1; bnx2_netif_stop(bp); bnx2_init_nic(bp); atomic_set(&bp->intr_sem, 1); bnx2_netif_start(bp); + bp->in_reset_task = 0; } static void @@ -4172,7 +4177,13 @@ bnx2_close(struct net_device *dev) struct bnx2 *bp = dev->priv; u32 reset_code; - flush_scheduled_work(); + /* Calling flush_scheduled_work() may deadlock because + * linkwatch_event() may be on the workqueue and it will try to get + * the rtnl_lock which we are holding. + */ + while (bp->in_reset_task) + msleep(1); + bnx2_netif_stop(bp); del_timer_sync(&bp->timer); if (bp->wol) @@ -5453,6 +5464,8 @@ bnx2_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct bnx2 *bp = dev->priv; + flush_scheduled_work(); + unregister_netdev(dev); if (bp->regview) diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 8214a2853d0d..63b94ca0018b 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -3874,6 +3874,7 @@ struct bnx2 { int timer_interval; struct timer_list timer; struct work_struct reset_task; + int in_reset_task; /* Used to synchronize phy accesses. */ spinlock_t phy_lock; From cd339a0ed61097d92ce03b6d1042b1e4d58535e7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:35:24 -0700 Subject: [PATCH 465/584] [BNX2]: speedup serdes linkup This speeds up link-up time on 5706 SerDes if the link partner does not autoneg, a rather common scenario in blade servers. Some blade servers use IPMI for keyboard input and it's important to minimize link disruptions. The speedup is achieved by shortening the timer to (HZ / 3) during the transient period right after initiating a SerDes autoneg. If autoneg does not complete, parallel detect can be done sooner. After the transient period is over, the timer goes back to its normal HZ interval. As suggested by Jeff Garzik, the timer initialization is moved to bnx2_init_board() from bnx2_open(). An eeprom bit is also added to allow default forced SerDes speed for even faster link-up time. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 52 +++++++++++++++++++++++++++++++++++----------- drivers/net/bnx2.h | 6 +++++- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 635a5856102b..015ff7906601 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -806,7 +806,19 @@ bnx2_setup_serdes_phy(struct bnx2 *bp) bnx2_write_phy(bp, MII_ADVERTISE, new_adv); bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE); - bp->serdes_an_pending = SERDES_AN_TIMEOUT / bp->timer_interval; + if (CHIP_NUM(bp) == CHIP_NUM_5706) { + /* Speed up link-up time when the link partner + * does not autonegotiate which is very common + * in blade servers. Some blade servers use + * IPMI for kerboard input and it's important + * to minimize link disruptions. Autoneg. involves + * exchanging base pages plus 3 next pages and + * normally completes in about 120 msec. + */ + bp->current_interval = SERDES_AN_TIMEOUT; + bp->serdes_an_pending = 1; + mod_timer(&bp->timer, jiffies + bp->current_interval); + } } return 0; @@ -3800,6 +3812,9 @@ bnx2_timer(unsigned long data) struct bnx2 *bp = (struct bnx2 *) data; u32 msg; + if (!netif_running(bp->dev)) + return; + if (atomic_read(&bp->intr_sem) != 0) goto bnx2_restart_timer; @@ -3817,6 +3832,8 @@ bnx2_timer(unsigned long data) else if ((bp->link_up == 0) && (bp->autoneg & AUTONEG_SPEED)) { u32 bmcr; + bp->current_interval = bp->timer_interval; + bnx2_read_phy(bp, MII_BMCR, &bmcr); if (bmcr & BMCR_ANENABLE) { @@ -3859,14 +3876,14 @@ bnx2_timer(unsigned long data) } } + else + bp->current_interval = bp->timer_interval; spin_unlock_irqrestore(&bp->phy_lock, flags); } bnx2_restart_timer: - bp->timer.expires = RUN_AT(bp->timer_interval); - - add_timer(&bp->timer); + mod_timer(&bp->timer, jiffies + bp->current_interval); } /* Called with rtnl_lock */ @@ -3919,12 +3936,7 @@ bnx2_open(struct net_device *dev) return rc; } - init_timer(&bp->timer); - - bp->timer.expires = RUN_AT(bp->timer_interval); - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2_timer; - add_timer(&bp->timer); + mod_timer(&bp->timer, jiffies + bp->current_interval); atomic_set(&bp->intr_sem, 0); @@ -4485,8 +4497,9 @@ bnx2_nway_reset(struct net_device *dev) spin_lock_irq(&bp->phy_lock); if (CHIP_NUM(bp) == CHIP_NUM_5706) { - bp->serdes_an_pending = SERDES_AN_TIMEOUT / - bp->timer_interval; + bp->current_interval = SERDES_AN_TIMEOUT; + bp->serdes_an_pending = 1; + mod_timer(&bp->timer, jiffies + bp->current_interval); } } @@ -5315,6 +5328,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->stats_ticks = 1000000 & 0xffff00; bp->timer_interval = HZ; + bp->current_interval = HZ; /* Disable WOL support if we are running on a SERDES chip. */ if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT) { @@ -5338,6 +5352,15 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->req_line_speed = 0; if (bp->phy_flags & PHY_SERDES_FLAG) { bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg; + + reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + + BNX2_PORT_HW_CFG_CONFIG); + reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK; + if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) { + bp->autoneg = 0; + bp->req_line_speed = bp->line_speed = SPEED_1000; + bp->req_duplex = DUPLEX_FULL; + } } else { bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg; @@ -5345,6 +5368,11 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; + init_timer(&bp->timer); + bp->timer.expires = RUN_AT(bp->timer_interval); + bp->timer.data = (unsigned long) bp; + bp->timer.function = bnx2_timer; + return 0; err_out_unmap: diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 63b94ca0018b..e1fb099acbf2 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -3872,6 +3872,7 @@ struct bnx2 { char *name; int timer_interval; + int current_interval; struct timer_list timer; struct work_struct reset_task; int in_reset_task; @@ -3986,7 +3987,7 @@ struct bnx2 { #define PHY_LOOPBACK 2 u8 serdes_an_pending; -#define SERDES_AN_TIMEOUT (2 * HZ) +#define SERDES_AN_TIMEOUT (HZ / 3) u8 mac_addr[8]; @@ -4172,6 +4173,9 @@ struct fw_info { #define BNX2_PORT_HW_CFG_MAC_LOWER 0x00000054 #define BNX2_PORT_HW_CFG_CONFIG 0x00000058 +#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK 0x001f0000 +#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_AN 0x00000000 +#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G 0x00030000 #define BNX2_PORT_HW_CFG_IMD_MAC_A_UPPER 0x00000068 #define BNX2_PORT_HW_CFG_IMD_MAC_A_LOWER 0x0000006c From e89bbf1049aac3625fdafe3657ed8d7d5373d351 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:36:58 -0700 Subject: [PATCH 466/584] [BNX2]: remove atomics in tx Remove atomic operations in the fast tx path. Expensive atomic operations were used to keep track of the number of available tx descriptors. The new code uses the difference between the consumer and producer index to determine the number of free tx descriptors. As suggested by Jeff Garzik, the name of the inline function is changed to all lower case. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 35 +++++++++++++++++------------------ drivers/net/bnx2.h | 3 +-- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 015ff7906601..da903b3ebfb0 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -107,6 +107,15 @@ static struct flash_spec flash_table[] = MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); +static inline u32 bnx2_tx_avail(struct bnx2 *bp) +{ + u32 diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons); + + if (diff > MAX_TX_DESC_CNT) + diff = (diff & MAX_TX_DESC_CNT) - 1; + return (bp->tx_ring_size - diff); +} + static u32 bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset) { @@ -1338,22 +1347,19 @@ bnx2_tx_int(struct bnx2 *bp) } } - atomic_add(tx_free_bd, &bp->tx_avail_bd); + bp->tx_cons = sw_cons; if (unlikely(netif_queue_stopped(bp->dev))) { unsigned long flags; spin_lock_irqsave(&bp->tx_lock, flags); if ((netif_queue_stopped(bp->dev)) && - (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)) { + (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) { netif_wake_queue(bp->dev); } spin_unlock_irqrestore(&bp->tx_lock, flags); } - - bp->tx_cons = sw_cons; - } static inline void @@ -2971,7 +2977,6 @@ bnx2_init_tx_ring(struct bnx2 *bp) bp->tx_prod = 0; bp->tx_cons = 0; bp->tx_prod_bseq = 0; - atomic_set(&bp->tx_avail_bd, bp->tx_ring_size); val = BNX2_L2CTX_TYPE_TYPE_L2; val |= BNX2_L2CTX_TYPE_SIZE_L2; @@ -4057,9 +4062,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) u16 prod, ring_prod; int i; - if (unlikely(atomic_read(&bp->tx_avail_bd) < - (skb_shinfo(skb)->nr_frags + 1))) { - + if (unlikely(bnx2_tx_avail(bp) < (skb_shinfo(skb)->nr_frags + 1))) { netif_stop_queue(dev); printk(KERN_ERR PFX "%s: BUG! Tx ring full when queue awake!\n", dev->name); @@ -4156,8 +4159,6 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) prod = NEXT_TX_BD(prod); bp->tx_prod_bseq += skb->len; - atomic_sub(last_frag + 1, &bp->tx_avail_bd); - REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, prod); REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq); @@ -4166,16 +4167,14 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) bp->tx_prod = prod; dev->trans_start = jiffies; - if (unlikely(atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS)) { + if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) { unsigned long flags; spin_lock_irqsave(&bp->tx_lock, flags); - if (atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS) { - netif_stop_queue(dev); - - if (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS) - netif_wake_queue(dev); - } + netif_stop_queue(dev); + + if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS) + netif_wake_queue(dev); spin_unlock_irqrestore(&bp->tx_lock, flags); } diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index e1fb099acbf2..9ad3f5740cd8 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -3841,12 +3841,12 @@ struct bnx2 { struct status_block *status_blk; u32 last_status_idx; - atomic_t tx_avail_bd; struct tx_bd *tx_desc_ring; struct sw_bd *tx_buf_ring; u32 tx_prod_bseq; u16 tx_prod; u16 tx_cons; + int tx_ring_size; #ifdef BCM_VLAN struct vlan_group *vlgrp; @@ -3929,7 +3929,6 @@ struct bnx2 { u16 fw_wr_seq; u16 fw_drv_pulse_wr_seq; - int tx_ring_size; dma_addr_t tx_desc_mapping; From c770a65cee7cc250d7bccd99fa55e742988ae4e0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:38:39 -0700 Subject: [PATCH 467/584] [BNX2]: change irq locks to bh locks Change all locks from spin_lock_irqsave() to spin_lock_bh(). All places that require spinlocks are in BH context. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 56 ++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index da903b3ebfb0..418190b79f6c 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -1350,15 +1350,13 @@ bnx2_tx_int(struct bnx2 *bp) bp->tx_cons = sw_cons; if (unlikely(netif_queue_stopped(bp->dev))) { - unsigned long flags; - - spin_lock_irqsave(&bp->tx_lock, flags); + spin_lock(&bp->tx_lock); if ((netif_queue_stopped(bp->dev)) && (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) { netif_wake_queue(bp->dev); } - spin_unlock_irqrestore(&bp->tx_lock, flags); + spin_unlock(&bp->tx_lock); } } @@ -1598,11 +1596,9 @@ bnx2_poll(struct net_device *dev, int *budget) (bp->status_blk->status_attn_bits_ack & STATUS_ATTN_BITS_LINK_STATE)) { - unsigned long flags; - - spin_lock_irqsave(&bp->phy_lock, flags); + spin_lock(&bp->phy_lock); bnx2_phy_int(bp); - spin_unlock_irqrestore(&bp->phy_lock, flags); + spin_unlock(&bp->phy_lock); } if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_cons) { @@ -1645,9 +1641,8 @@ bnx2_set_rx_mode(struct net_device *dev) struct bnx2 *bp = dev->priv; u32 rx_mode, sort_mode; int i; - unsigned long flags; - spin_lock_irqsave(&bp->phy_lock, flags); + spin_lock_bh(&bp->phy_lock); rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS | BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG); @@ -1708,7 +1703,7 @@ bnx2_set_rx_mode(struct net_device *dev) REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode); REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode | BNX2_RPM_SORT_USER0_ENA); - spin_unlock_irqrestore(&bp->phy_lock, flags); + spin_unlock_bh(&bp->phy_lock); } static void @@ -3768,10 +3763,10 @@ bnx2_test_link(struct bnx2 *bp) { u32 bmsr; - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); bnx2_read_phy(bp, MII_BMSR, &bmsr); bnx2_read_phy(bp, MII_BMSR, &bmsr); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); if (bmsr & BMSR_LSTATUS) { return 0; @@ -3828,9 +3823,8 @@ bnx2_timer(unsigned long data) if ((bp->phy_flags & PHY_SERDES_FLAG) && (CHIP_NUM(bp) == CHIP_NUM_5706)) { - unsigned long flags; - spin_lock_irqsave(&bp->phy_lock, flags); + spin_lock(&bp->phy_lock); if (bp->serdes_an_pending) { bp->serdes_an_pending--; } @@ -3884,7 +3878,7 @@ bnx2_timer(unsigned long data) else bp->current_interval = bp->timer_interval; - spin_unlock_irqrestore(&bp->phy_lock, flags); + spin_unlock(&bp->phy_lock); } bnx2_restart_timer: @@ -4168,14 +4162,12 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) { - unsigned long flags; - - spin_lock_irqsave(&bp->tx_lock, flags); + spin_lock(&bp->tx_lock); netif_stop_queue(dev); if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS) netif_wake_queue(dev); - spin_unlock_irqrestore(&bp->tx_lock, flags); + spin_unlock(&bp->tx_lock); } return NETDEV_TX_OK; @@ -4411,11 +4403,11 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->req_line_speed = req_line_speed; bp->req_duplex = req_duplex; - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); bnx2_setup_phy(bp); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); return 0; } @@ -4485,16 +4477,16 @@ bnx2_nway_reset(struct net_device *dev) return -EINVAL; } - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); /* Force a link down visible on the other side */ if (bp->phy_flags & PHY_SERDES_FLAG) { bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); msleep(20); - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); if (CHIP_NUM(bp) == CHIP_NUM_5706) { bp->current_interval = SERDES_AN_TIMEOUT; bp->serdes_an_pending = 1; @@ -4506,7 +4498,7 @@ bnx2_nway_reset(struct net_device *dev) bmcr &= ~BMCR_LOOPBACK; bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); return 0; } @@ -4692,11 +4684,11 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) bp->autoneg &= ~AUTONEG_FLOW_CTRL; } - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); bnx2_setup_phy(bp); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); return 0; } @@ -5046,9 +5038,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: { u32 mii_regval; - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); err = bnx2_read_phy(bp, data->reg_num & 0x1f, &mii_regval); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); data->val_out = mii_regval; @@ -5059,9 +5051,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - spin_lock_irq(&bp->phy_lock); + spin_lock_bh(&bp->phy_lock); err = bnx2_write_phy(bp, data->reg_num & 0x1f, data->val_in); - spin_unlock_irq(&bp->phy_lock); + spin_unlock_bh(&bp->phy_lock); return err; From 73eef4cddb2738c4e8c5ef157ebb1b19d6c9272f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Aug 2005 15:39:15 -0700 Subject: [PATCH 468/584] [BNX2]: update version and minor fixes Update version and add 4 minor fixes, the last 2 were suggested by Jeff Garzik: 1. check for a valid ethernet address before setting it 2. zero out bp->regview if init_one encounters an error and unmaps the IO address. This prevents remove_one from unmapping again. 3. use netif_rx_schedule() instead of hand coding the same. 4. use IRQ_HANDLED and IRQ_NONE. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 418190b79f6c..7babf6af4e28 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -14,8 +14,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.2.19" -#define DRV_MODULE_RELDATE "May 23, 2005" +#define DRV_MODULE_VERSION "1.2.20" +#define DRV_MODULE_RELDATE "August 22, 2005" #define RUN_AT(x) (jiffies + (x)) @@ -1538,15 +1538,12 @@ bnx2_msi(int irq, void *dev_instance, struct pt_regs *regs) BNX2_PCICFG_INT_ACK_CMD_MASK_INT); /* Return here if interrupt is disabled. */ - if (unlikely(atomic_read(&bp->intr_sem) != 0)) { - return IRQ_RETVAL(1); - } + if (unlikely(atomic_read(&bp->intr_sem) != 0)) + return IRQ_HANDLED; - if (netif_rx_schedule_prep(dev)) { - __netif_rx_schedule(dev); - } + netif_rx_schedule(dev); - return IRQ_RETVAL(1); + return IRQ_HANDLED; } static irqreturn_t @@ -1564,22 +1561,19 @@ bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs) if ((bp->status_blk->status_idx == bp->last_status_idx) || (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) & BNX2_PCICFG_MISC_STATUS_INTA_VALUE)) - return IRQ_RETVAL(0); + return IRQ_NONE; REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); /* Return here if interrupt is shared and is disabled. */ - if (unlikely(atomic_read(&bp->intr_sem) != 0)) { - return IRQ_RETVAL(1); - } + if (unlikely(atomic_read(&bp->intr_sem) != 0)) + return IRQ_HANDLED; - if (netif_rx_schedule_prep(dev)) { - __netif_rx_schedule(dev); - } + netif_rx_schedule(dev); - return IRQ_RETVAL(1); + return IRQ_HANDLED; } static int @@ -5071,6 +5065,9 @@ bnx2_change_mac_addr(struct net_device *dev, void *p) struct sockaddr *addr = p; struct bnx2 *bp = dev->priv; + if (!is_valid_ether_addr(addr->sa_data)) + return -EINVAL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); if (netif_running(dev)) bnx2_set_mac_addr(bp); @@ -5369,6 +5366,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) err_out_unmap: if (bp->regview) { iounmap(bp->regview); + bp->regview = NULL; } err_out_release: From 57bf1451ac79640c5a0a4f31284c43539fac2903 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Aug 2005 16:06:19 -0700 Subject: [PATCH 469/584] [NET]: net/802: more endian annotations The rest of endian warnings now belongs to tr.c exclusively. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 3 +-- include/linux/if_fc.h | 2 +- include/linux/if_fddi.h | 2 +- include/linux/if_hippi.h | 6 +++--- net/802/fc.c | 2 +- net/802/fddi.c | 4 ++-- net/802/hippi.c | 2 +- 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9bc3b688d2ee..bab303dafd6e 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -31,8 +31,7 @@ struct hippi_cb { __u32 ifield; }; -extern unsigned short hippi_type_trans(struct sk_buff *skb, - struct net_device *dev); +extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h index 33330b458b95..376a34ea4723 100644 --- a/include/linux/if_fc.h +++ b/include/linux/if_fc.h @@ -44,7 +44,7 @@ struct fcllc { __u8 ssap; /* source SAP */ __u8 llc; /* LLC control field */ __u8 protid[3]; /* protocol id */ - __u16 ethertype; /* ether type field */ + __be16 ethertype; /* ether type field */ }; #endif /* _LINUX_IF_FC_H */ diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index a912818e6361..1288a161bc0b 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -85,7 +85,7 @@ struct fddi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); /* Define FDDI LLC frame header */ diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index c8ca72c46f76..94d31ca7d71a 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -102,9 +102,9 @@ struct hippi_fp_hdr #error "Please fix " #endif #else - __u32 fixed; + __be32 fixed; #endif - __u32 d2_size; + __be32 d2_size; } __attribute__ ((packed)); struct hippi_le_hdr @@ -144,7 +144,7 @@ struct hippi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); struct hippi_hdr diff --git a/net/802/fc.c b/net/802/fc.c index 640d34e026c2..282c4ab1abe6 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -87,7 +87,7 @@ static int fc_rebuild_header(struct sk_buff *skb) struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); if(fcllc->ethertype != htons(ETH_P_IP)) { - printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype)); + printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); return 0; } #ifdef CONFIG_INET diff --git a/net/802/fddi.c b/net/802/fddi.c index 5ce24c4bb840..ac242a4bc346 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -108,8 +108,8 @@ static int fddi_rebuild_header(struct sk_buff *skb) else #endif { - printk("%s: Don't know how to resolve type %02X addresses.\n", - skb->dev->name, htons(fddi->hdr.llc_snap.ethertype)); + printk("%s: Don't know how to resolve type %04X addresses.\n", + skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); return(0); } } diff --git a/net/802/hippi.c b/net/802/hippi.c index cb45ae1310cb..6d7fed3dd99a 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -124,7 +124,7 @@ static int hippi_rebuild_header(struct sk_buff *skb) * Determine the packet's protocol ID. */ -unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) { struct hippi_hdr *hip; From cf4ef01440ca5c6d96f2ea2b793a37a0a863a045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Aug 2005 16:08:37 -0700 Subject: [PATCH 470/584] [LIST]: Add docbook header comments for hlist_add_{before,after}_rcu() Signed-off-by: David S. Miller --- include/linux/list.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/linux/list.h b/include/linux/list.h index 9b9b0eec1e8a..e6ec59682274 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,21 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +/** + * hlist_add_before_rcu - adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { @@ -644,6 +659,21 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, *(n->pprev) = n; } +/** + * hlist_add_after_rcu - adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_after_rcu(struct hlist_node *prev, struct hlist_node *n) { From 8082e4ed0a61da347f1c7f210493c4e9e55c8cd0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 25 Aug 2005 16:12:22 -0700 Subject: [PATCH 471/584] [LIB]: Boyer-Moore extension for textsearch infrastructure strike #2 Attached the implementation of the Boyer-Moore string search algorithm for the new textsearch infrastructure. I've added as well a note about the limitations that this approach presents, as Thomas has remarked. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- lib/Kconfig | 10 +++ lib/Makefile | 1 + lib/ts_bm.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 lib/ts_bm.c diff --git a/lib/Kconfig b/lib/Kconfig index eeb429a52152..b62f685f797f 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -72,6 +72,16 @@ config TEXTSEARCH config TEXTSEARCH_KMP tristate +config TEXTSEARCH_BM + depends on TEXTSEARCH + tristate "Boyer-Moore" + help + Say Y here if you want to be able to search text using the + Boyer-Moore textsearch algorithm. + + To compile this code as a module, choose M here: the + module will be called ts_bm. + config TEXTSEARCH_FSM tristate diff --git a/lib/Makefile b/lib/Makefile index f28d9031303c..52f83380f704 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o +obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o hostprogs-y := gen_crc32table diff --git a/lib/ts_bm.c b/lib/ts_bm.c new file mode 100644 index 000000000000..2cc79112ecc3 --- /dev/null +++ b/lib/ts_bm.c @@ -0,0 +1,185 @@ +/* + * lib/ts_bm.c Boyer-Moore text search implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pablo Neira Ayuso + * + * ========================================================================== + * + * Implements Boyer-Moore string matching algorithm: + * + * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. + * Communications of the Association for Computing Machinery, + * 20(10), 1977, pp. 762-772. + * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf + * + * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 + * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf + * + * Note: Since Boyer-Moore (BM) performs searches for matchings from right + * to left, it's still possible that a matching could be spread over + * multiple blocks, in that case this algorithm won't find any coincidence. + * + * If you're willing to ensure that such thing won't ever happen, use the + * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose + * the proper string search algorithm depending on your setting. + * + * Say you're using the textsearch infrastructure for filtering, NIDS or + * any similar security focused purpose, then go KMP. Otherwise, if you + * really care about performance, say you're classifying packets to apply + * Quality of Service (QoS) policies, and you don't mind about possible + * matchings spread over multiple fragments, then go BM. + */ + +#include +#include +#include +#include +#include +#include + +/* Alphabet size, use ASCII */ +#define ASIZE 256 + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(args, format...) +#endif + +struct ts_bm +{ + u8 * pattern; + unsigned int patlen; + unsigned int bad_shift[ASIZE]; + unsigned int good_shift[0]; +}; + +static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_bm *bm = ts_config_priv(conf); + unsigned int i, text_len, consumed = state->offset; + const u8 *text; + int shift = bm->patlen, bs; + + for (;;) { + text_len = conf->get_next_block(consumed, &text, conf, state); + + if (unlikely(text_len == 0)) + break; + + while (shift < text_len) { + DEBUGP("Searching in position %d (%c)\n", + shift, text[shift]); + for (i = 0; i < bm->patlen; i++) + if (text[shift-i] != bm->pattern[bm->patlen-1-i]) + goto next; + + /* London calling... */ + DEBUGP("found!\n"); + return consumed += (shift-(bm->patlen-1)); + +next: bs = bm->bad_shift[text[shift-i]]; + + /* Now jumping to... */ + shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); + } + consumed += text_len; + } + + return UINT_MAX; +} + +static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, + unsigned int len) +{ + int i, j, ended, l[ASIZE]; + + for (i = 0; i < ASIZE; i++) + bm->bad_shift[i] = len; + for (i = 0; i < len - 1; i++) + bm->bad_shift[pattern[i]] = len - 1 - i; + + /* Compute the good shift array, used to match reocurrences + * of a subpattern */ + for (i = 1; i < bm->patlen; i++) { + for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j] + == bm->pattern[bm->patlen - 1 - i - j]; j++); + l[i] = j; + } + + bm->good_shift[0] = 1; + for (i = 1; i < bm->patlen; i++) + bm->good_shift[i] = bm->patlen; + for (i = bm->patlen - 1; i > 0; i--) + bm->good_shift[l[i]] = i; + ended = 0; + for (i = 0; i < bm->patlen; i++) { + if (l[i] == bm->patlen - 1 - i) + ended = i; + if (ended) + bm->good_shift[i] = ended; + } +} + +static struct ts_config *bm_init(const void *pattern, unsigned int len, + int gfp_mask) +{ + struct ts_config *conf; + struct ts_bm *bm; + unsigned int prefix_tbl_len = len * sizeof(unsigned int); + size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + bm = ts_config_priv(conf); + bm->patlen = len; + bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; + compute_prefix_tbl(bm, pattern, len); + memcpy(bm->pattern, pattern, len); + + return conf; +} + +static void *bm_get_pattern(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->pattern; +} + +static unsigned int bm_get_pattern_len(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->patlen; +} + +static struct ts_ops bm_ops = { + .name = "bm", + .find = bm_find, + .init = bm_init, + .get_pattern = bm_get_pattern, + .get_pattern_len = bm_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(bm_ops.list) +}; + +static int __init init_bm(void) +{ + return textsearch_register(&bm_ops); +} + +static void __exit exit_bm(void) +{ + textsearch_unregister(&bm_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_bm); +module_exit(exit_bm); From 29cb9f9c5502f6218cd3ea574efe46a5e55522d2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 25 Aug 2005 16:23:11 -0700 Subject: [PATCH 472/584] [LIB]: Make TEXTSEARCH_BM plain tristate like the others And select it when the relevant modules are enabled. Signed-off-by: David S. Miller --- lib/Kconfig | 9 +-------- net/ipv4/netfilter/Kconfig | 1 + net/sched/Kconfig | 1 + 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index b62f685f797f..e43197efeb9c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -73,14 +73,7 @@ config TEXTSEARCH_KMP tristate config TEXTSEARCH_BM - depends on TEXTSEARCH - tristate "Boyer-Moore" - help - Say Y here if you want to be able to search text using the - Boyer-Moore textsearch algorithm. - - To compile this code as a module, choose M here: the - module will be called ts_bm. + tristate config TEXTSEARCH_FSM tristate diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f2bea6ecb226..c4213f3de505 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -415,6 +415,7 @@ config IP_NF_MATCH_STRING depends on IP_NF_IPTABLES select TEXTSEARCH select TEXTSEARCH_KMP + select TEXTSEARCH_BM select TEXTSEARCH_FSM help This option adds a `string' match, which allows you to look for diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 59d3e71f8b85..45d3bc0812c8 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -491,6 +491,7 @@ config NET_EMATCH_TEXT depends on NET_EMATCH select TEXTSEARCH select TEXTSEARCH_KMP + select TEXTSEARCH_BM select TEXTSEARCH_FSM ---help--- Say Y here if you want to be ablt to classify packets based on From ba89966c1984513f4f2cc0a6c182266be44ddd03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2005 12:05:31 -0700 Subject: [PATCH 473/584] [NET]: use __read_mostly on kmem_cache_t , DEFINE_SNMP_STAT pointers This patch puts mostly read only data in the right section (read_mostly), to help sharing of these data between CPUS without memory ping pongs. On one of my production machine, tcp_statistics was sitting in a heavily modified cache line, so *every* SNMP update had to force a reload. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- net/core/flow.c | 2 +- net/core/skbuff.c | 4 ++-- net/dccp/ccids/ccid3.c | 2 +- net/dccp/proto.c | 2 +- net/decnet/dn_table.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/inetpeer.c | 2 +- net/ipv4/ip_input.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/ipvs/ip_vs_conn.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 4 ++-- net/ipv4/netfilter/ipt_hashlimit.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/udp.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/sctp/protocol.c | 6 +++--- net/socket.c | 4 ++-- net/sunrpc/rpc_pipe.c | 4 ++-- net/sunrpc/sched.c | 8 ++++---- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 29 files changed, 39 insertions(+), 39 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e6c2200b7ca3..24396b914d11 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,7 +23,7 @@ #include #include "br_private.h" -static kmem_cache_t *br_fdb_cache; +static kmem_cache_t *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); diff --git a/net/core/flow.c b/net/core/flow.c index f289570b15a3..7e95b39de9fd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep; +static kmem_cache_t *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b853a9b29eb6..f80a28785610 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,8 +68,8 @@ #include #include -static kmem_cache_t *skbuff_head_cache; -static kmem_cache_t *skbuff_fclone_cache; +static kmem_cache_t *skbuff_head_cache __read_mostly; +static kmem_cache_t *skbuff_fclone_cache __read_mostly; struct timeval __read_mostly skb_tv_base; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fe4cc85f5bcc..cf93b019ecbe 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -85,7 +85,7 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static kmem_cache_t *ccid3_loss_interval_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab __read_mostly; static inline struct ccid3_loss_interval_hist_entry * ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 600dda51d995..f97e92ea34f3 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -39,7 +39,7 @@ #include "ccid.h" #include "dccp.h" -DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; atomic_t dccp_orphan_count = ATOMIC_INIT(0); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 73a88489ff3e..eeba56f99323 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n static DEFINE_RWLOCK(dn_fib_tables_lock); struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; -static kmem_cache_t *dn_hash_kmem; +static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5810f9d14914..bf147f8db399 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -113,7 +113,7 @@ #include #endif -DEFINE_SNMP_STAT(struct linux_mib, net_statistics); +DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; extern void ip_mc_drop_socket(struct sock *sk); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b10d6bb5ef3d..2a8c9afc3695 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -45,8 +45,8 @@ #include "fib_lookup.h" -static kmem_cache_t *fn_hash_kmem; -static kmem_cache_t *fn_alias_kmem; +static kmem_cache_t *fn_hash_kmem __read_mostly; +static kmem_cache_t *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff21748248e4..b2dea4e5da77 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -166,7 +166,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); static void trie_dump_seq(struct seq_file *seq, struct trie *t); -static kmem_cache_t *fn_alias_kmem; +static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 25f66b750fd8..24eb56ae1b5a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -114,7 +114,7 @@ struct icmp_bxm { /* * Statistics */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); +DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 4410b9dc03e9..f84ba9c96551 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -73,7 +73,7 @@ /* Exported for inet_getid inline function. */ DEFINE_SPINLOCK(inet_peer_idlock); -static kmem_cache_t *peer_cachep; +static kmem_cache_t *peer_cachep __read_mostly; #define node_height(x) x->avl_height static struct inet_peer peer_fake_node = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 220a8b5920ea..473d0f2b2e0d 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -150,7 +150,7 @@ * SNMP management statistics */ -DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics); +DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; /* * Process Router Attention IP option diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dc806b578427..9dbf5909f3a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); In this case data path is free of exclusive locks at all. */ -static kmem_cache_t *mrt_cachep; +static kmem_cache_t *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index d0145a8b1551..e11952ea17af 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -40,7 +40,7 @@ static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ -static kmem_cache_t *ip_vs_conn_cachep; +static kmem_cache_t *ip_vs_conn_cachep __read_mostly; /* counter for current IPVS connections */ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 285743bfbed3..a0648600190e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -70,8 +70,8 @@ static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size = 0; int ip_conntrack_max; struct list_head *ip_conntrack_hash; -static kmem_cache_t *ip_conntrack_cachep; -static kmem_cache_t *ip_conntrack_expect_cachep; +static kmem_cache_t *ip_conntrack_cachep __read_mostly; +static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 564b49bfebcf..2dd1cccbdab9 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -94,7 +94,7 @@ struct ipt_hashlimit_htable { static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep; +static kmem_cache_t *hashlimit_cachep __read_mostly; static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68626de6d69c..02fdda68718d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -269,7 +269,7 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; -DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); +DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; atomic_t tcp_orphan_count = ATOMIC_INIT(0); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3a5bbbe7dd85..e5beca7de86c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -113,7 +113,7 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); +DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ff685f229b69..5176fc655ea9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,7 +67,7 @@ #include #include -DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); +DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; /* * The ICMP socket(s). This is the most convenient way to flow control diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1b354aa97934..16af874c9e8f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -49,7 +49,7 @@ struct rt6_statistics rt6_stats; -static kmem_cache_t * fib6_node_kmem; +static kmem_cache_t * fib6_node_kmem __read_mostly; enum fib_walk_state_t { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7516b8829a9d..76466af8331e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,7 +55,7 @@ #include -DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); +DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 67d9a04b6902..390d750449ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -59,7 +59,7 @@ #include #include -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); +DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; /* Grrr, addr_type already calculated by caller, but I don't want * to add some silly "cookie" argument to this method just for that. diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 60c26c87277e..fbef7826a74f 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -79,7 +79,7 @@ static u32 xfrm6_tunnel_spi; #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff -static kmem_cache_t *xfrm6_tunnel_spi_kmem; +static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7d8ec6526347..e7025be77691 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -62,7 +62,7 @@ /* Global data structures. */ struct sctp_globals sctp_globals; struct proc_dir_entry *proc_net_sctp; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); +DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -78,8 +78,8 @@ static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; -kmem_cache_t *sctp_chunk_cachep; -kmem_cache_t *sctp_bucket_cachep; +kmem_cache_t *sctp_chunk_cachep __read_mostly; +kmem_cache_t *sctp_bucket_cachep __read_mostly; extern int sctp_snmp_proc_init(void); extern int sctp_snmp_proc_exit(void); diff --git a/net/socket.c b/net/socket.c index ce69b7862f59..94fe638b4d72 100644 --- a/net/socket.c +++ b/net/socket.c @@ -274,7 +274,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t * sock_inode_cachep; +static kmem_cache_t * sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { @@ -333,7 +333,7 @@ static struct super_block *sockfs_get_sb(struct file_system_type *fs_type, return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); } -static struct vfsmount *sock_mnt; +static struct vfsmount *sock_mnt __read_mostly; static struct file_system_type sock_fs_type = { .name = "sockfs", diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 554f224c0445..fe1a73ce6cff 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -28,13 +28,13 @@ #include #include -static struct vfsmount *rpc_mount; +static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; -static kmem_cache_t *rpc_inode_cachep; +static kmem_cache_t *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2d9eb7fbd521..f3104035e35d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,10 +34,10 @@ static int rpc_task_id; #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) -static kmem_cache_t *rpc_task_slabp; -static kmem_cache_t *rpc_buffer_slabp; -static mempool_t *rpc_task_mempool; -static mempool_t *rpc_buffer_mempool; +static kmem_cache_t *rpc_task_slabp __read_mostly; +static kmem_cache_t *rpc_buffer_slabp __read_mostly; +static mempool_t *rpc_task_mempool __read_mostly; +static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index c58a6f05a0b6..2407a7072327 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -12,7 +12,7 @@ #include #include -static kmem_cache_t *secpath_cachep; +static kmem_cache_t *secpath_cachep __read_mostly; void __secpath_destroy(struct sec_path *sp) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d65ed8684fc1..83c8135e1764 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache; +static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; static struct list_head xfrm_policy_gc_list = From 75b3f207b433dcb807fcf0f47de1c8398571ba5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 02:35:30 -0300 Subject: [PATCH 474/584] [DCCP]: Make the Debug Menu available when DCCP is statically linked too Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 3023f702eb87..187ac182e24b 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -27,7 +27,7 @@ config INET_DCCP_DIAG source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" - depends on IP_DCCP=m && DEBUG_KERNEL=y + depends on IP_DCCP && DEBUG_KERNEL=y config IP_DCCP_DEBUG bool "DCCP debug messages" @@ -37,7 +37,7 @@ config IP_DCCP_DEBUG Just say N. config IP_DCCP_UNLOAD_HACK - depends on IP_DCCP_CCID3=m + depends on IP_DCCP=m && IP_DCCP_CCID3=m bool "DCCP control sock unload hack" ---help--- Enable this to be able to unload the dccp module when the it From d6809c12b3334a929c39bf08ea63bd819e0500f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 03:06:35 -0300 Subject: [PATCH 475/584] [DCCP]: Introduce dccp_wait_for_ccid and use it in dccp_write_xmit This is not quite what I think we should have long term but improves performance for now, so lets use it till we get CCID3 working well, then we can think about using sk_write_queue, perhaps using some ideas from Juwen Lai's old stack for 2.4.20. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 2 +- net/dccp/dccp.h | 3 +-- net/dccp/output.c | 61 ++++++++++++++++++++++++++++++++++++++++-- net/dccp/proto.c | 2 +- 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cf93b019ecbe..9866dc175258 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -985,7 +985,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ - rc = delay > 0 ? -EAGAIN : 0; + rc = delay > 0 ? delay : 0; break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index c6ba07ea1a9f..6ba21509e797 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -126,8 +126,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, - const int len); +extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) diff --git a/net/dccp/output.c b/net/dccp/output.c index f96dedd3ad5e..116f6db5678d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -150,14 +150,71 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) +/** + * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet + * @sk: socket to wait for + * @timeo: for how long + */ +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, + long *timeo) +{ + struct dccp_sock *dp = dccp_sk(sk); + DEFINE_WAIT(wait); + long delay; + int rc; + + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + goto do_error; + if (!*timeo) + goto do_nonblock; + if (signal_pending(current)) + goto do_interrupted; + + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + skb->len); + if (rc <= 0) + break; + delay = msecs_to_jiffies(rc); + if (delay > *timeo || delay < 0) + goto do_nonblock; + + sk->sk_write_pending++; + release_sock(sk); + *timeo -= schedule_timeout(delay); + lock_sock(sk); + sk->sk_write_pending--; + } +out: + finish_wait(sk->sk_sleep, &wait); + return rc; + +do_error: + rc = -EPIPE; + goto out; +do_nonblock: + rc = -EAGAIN; + goto out; +do_interrupted: + rc = sock_intr_errno(*timeo); + goto out; +} + +int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) { const struct dccp_sock *dp = dccp_sk(sk); - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, len); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + skb->len); + + if (err > 0) + err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; if (sk->sk_state == DCCP_PARTOPEN) { /* See 8.1.5. Handshake Completion */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index f97e92ea34f3..f4da6561e40c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -261,7 +261,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_write_xmit(sk, skb, len); + rc = dccp_write_xmit(sk, skb, &timeo); /* * XXX we don't use sk_write_queue, so just discard the packet. * Current plan however is to _use_ sk_write_queue with From 1f2333aea3269e196c44ae9a220e714cc1427792 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 03:51:58 -0300 Subject: [PATCH 476/584] [CCID3]: Reflow to mostly fit under 80 columns No code changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 290 +++++++++++++++++++++++++---------------- 1 file changed, 176 insertions(+), 114 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9866dc175258..225c53013172 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -142,14 +142,16 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) } #endif -static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) +static inline void ccid3_hc_tx_set_state(struct sock *sk, + enum ccid3_hc_tx_states state) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), + ccid3_tx_state_name(state)); WARN_ON(state == oldstate); hctx->ccid3hctx_state = state; } @@ -785,7 +787,8 @@ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) { - hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, + TFRC_OPSYS_HALF_TIME_GRAN); } @@ -804,20 +807,25 @@ static void ccid3_hc_tx_update_x(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ + /* To avoid large error in calcX */ + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), - hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, + 2 * hctx->ccid3hctx_x_recv), + (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)); } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { u32 rtt = hctx->ccid3hctx_rtt; if (rtt < 10) { rtt = 10; } /* avoid divide by zero below */ - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), - (hctx->ccid3hctx_s * 100000) / (rtt / 10)); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, + 2 * hctx->ccid3hctx_x), + ((hctx->ccid3hctx_s * 100000) / + (rtt / 10))); /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ do_gettimeofday(&hctx->ccid3hctx_t_ld); } @@ -840,7 +848,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ /* XXX: set some sensible MIB */ - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + HZ / 5); goto out; } @@ -858,27 +867,38 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) case TFRC_SSTATE_NO_FBACK: /* Halve send rate */ hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) - hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; + if (hctx->ccid3hctx_x < + (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME); - ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", - dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " + "bytes/s\n", + dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state), hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) - / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10), + TFRC_INITIAL_TIMEOUT); /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ - /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 - * should adjust tx_t_ipi and double that to achieve it really */ + /* + * FIXME - not sure above calculation is correct. See section + * 5 of CCID3 11 should adjust tx_t_ipi and double that to + * achieve it really + */ break; case TFRC_SSTATE_FBACK: - /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ + /* + * Check if IDLE since last timeout and recv rate is less than + * 4 packets per RTT + */ rtt = hctx->ccid3hctx_rtt; if (rtt < 10) rtt = 10; /* stop divide by zero below */ - if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= - 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { - ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, + if (!hctx->ccid3hctx_idle || + (hctx->ccid3hctx_x_recv >= 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); /* Halve sending rate */ @@ -887,7 +907,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * Else * X_recv = X_calc / 4; */ - BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && + hctx->ccid3hctx_x_calc == 0); /* check also if p is zero -> x_calc is infinity? */ if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || @@ -916,7 +937,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) } sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); hctx->ccid3hctx_idle = 1; out: bh_unlock_sock(sk); @@ -933,24 +954,27 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, long delay; int rc = -ENOTCONN; -// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); + /* Check if pure ACK or Terminating*/ + /* - * check if pure ACK or Terminating */ - /* XXX: We only call this function for DATA and DATAACK, on, these packets can have - * zero length, but why the comment about "pure ACK"? + * XXX: We only call this function for DATA and DATAACK, on, these + * packets can have zero length, but why the comment about "pure ACK"? */ - if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + if (hctx == NULL || len == 0 || + hctx->ccid3hctx_state == TFRC_SSTATE_TERM) goto out; /* See if last packet allocated was not sent */ new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (new_packet == NULL || new_packet->dccphtx_sent) { - new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, + SLAB_ATOMIC); rc = -ENOBUFS; if (new_packet == NULL) { ccid3_pr_debug("%s, sk=%p, not enough mem to add " - "to history, send refused\n", dccp_role(sk), sk); + "to history, send refused\n", + dccp_role(sk), sk); goto out; } @@ -961,12 +985,13 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, - dp->dccps_gss); + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", + dccp_role(sk), sk, dp->dccps_gss); hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); @@ -981,7 +1006,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + delay = now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta; ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ @@ -1027,41 +1052,35 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (packet == NULL) { - printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); + printk(KERN_CRIT "%s: packet doesn't exists in " + "history!\n", __FUNCTION__); return; } if (packet->dccphtx_sent) { - printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); + printk(KERN_CRIT "%s: no unsent packet in history!\n", + __FUNCTION__); return; } packet->dccphtx_tstamp = now; packet->dccphtx_seqno = dp->dccps_gss; -#if 0 - ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", - dccp_role(sk), sk, packet->dccphtx_seqno); -#endif /* - * Check if win_count have changed */ - /* COMPLIANCE_BEGIN - * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt + * Check if win_count have changed + * Algorithm in "8.1. Window Counter Valuer" in + * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / + (hctx->ccid3hctx_rtt / 4); if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", + ccid3_pr_debug("%s, sk=%p, window changed from " + "%u to %u!\n", dccp_role(sk), sk, packet->dccphtx_ccval, hctx->ccid3hctx_last_win_count); } - /* COMPLIANCE_END */ -#if 0 - ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", - dccp_role(sk), sk, - packet->dccphtx_seqno, - packet->dccphtx_ccval); -#endif + hctx->ccid3hctx_idle = 0; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -1073,7 +1092,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_SENT: /* if first wasn't pure ack */ if (len != 0) - printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", + printk(KERN_CRIT "%s: %s, First packet sent is noted " + "as a data packet\n", __FUNCTION__, dccp_role(sk)); return; case TFRC_SSTATE_NO_FBACK: @@ -1105,16 +1125,13 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) u32 pinv; u32 x_recv; u32 r_sample; -#if 0 - ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), - skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); -#endif + if (hctx == NULL) return; if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, received a packet when " + "terminating!\n", dccp_role(sk), sk); return; } @@ -1141,8 +1158,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq); if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", - dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " + "exist in history!\n", + dccp_role(sk), sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } @@ -1164,7 +1183,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); hctx->ccid3hctx_rtt = r_sample; } else - hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + + r_sample / 10; /* * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent @@ -1173,17 +1193,16 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->ccid3hctx_rtt < 4) hctx->ccid3hctx_rtt = 4; - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", - dccp_role(sk), sk, - hctx->ccid3hctx_rtt, - r_sample); + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " + "r_sample=%us\n", dccp_role(sk), sk, + hctx->ccid3hctx_rtt, r_sample); /* Update timeout interval */ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_PER_SEC); /* Update receive rate */ - hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ + hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ /* Update loss event rate */ if (pinv == ~0 || pinv == 0) @@ -1193,7 +1212,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { hctx->ccid3hctx_p = TFRC_SMALLEST_P; - ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", + dccp_role(sk), sk); } } @@ -1220,22 +1240,27 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) &hctx->ccid3hctx_hist, packet); if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx_x < 10\n"); hctx->ccid3hctx_x = 10; } /* to prevent divide by zero below */ - /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + /* + * Schedule no feedback timer to expire in + * max(4 * R, 2 * s / X) + */ next_tmout = max(hctx->ccid3hctx_t_rto, (2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ - ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " + "expire in %lu jiffies (%luus)\n", + dccp_role(sk), sk, + usecs_to_jiffies(next_tmout), next_tmout); sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); /* set idle flag */ hctx->ccid3hctx_idle = 1; @@ -1253,14 +1278,16 @@ static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || + sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; } static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, - unsigned char len, u16 idx, unsigned char *value) + unsigned char len, u16 idx, + unsigned char *value) { int rc = 0; struct dccp_sock *dp = dccp_sk(sk); @@ -1283,7 +1310,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", + ccid3_pr_debug("%s, sk=%p, invalid len for " + "TFRC_OPT_LOSS_EVENT_RATE\n", dccp_role(sk), sk); rc = -EINVAL; } else { @@ -1303,7 +1331,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, break; case TFRC_OPT_RECEIVE_RATE: if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", + ccid3_pr_debug("%s, sk=%p, invalid len for " + "TFRC_OPT_RECEIVE_RATE\n", dccp_role(sk), sk); rc = -EINVAL; } else { @@ -1325,7 +1354,8 @@ static int ccid3_hc_tx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), + gfp_any()); if (hctx == NULL) return -ENOMEM; @@ -1337,8 +1367,10 @@ static int ccid3_hc_tx_init(struct sock *sk) else hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; - hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ - hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ + /* Set transmission rate to 1 packet per second */ + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* See ccid3_hc_tx_packet_sent win_count calculatation */ + hctx->ccid3hctx_rtt = 4; hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); @@ -1389,14 +1421,16 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) } #endif -static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) +static inline void ccid3_hc_rx_set_state(struct sock *sk, + enum ccid3_hc_rx_states state) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), + ccid3_rx_state_name(state)); WARN_ON(state == oldstate); hcrx->ccid3hcrx_state = state; } @@ -1434,9 +1468,12 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, num_later++; if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(ccid3_rx_hist, packet); - ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", - dccp_role(sk), sk, seqno); + dccp_rx_hist_entry_delete(ccid3_rx_hist, + packet); + ccid3_pr_debug("%s, sk=%p, packet" + "(%llu) already lost!\n", + dccp_role(sk), sk, + seqno); return 1; } } @@ -1444,12 +1481,18 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, if (num_later < TFRC_RECV_NUM_LATE_LOSS) dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - /* FIXME: else what? should we destroy the packet like above? */ + /* + * FIXME: else what? should we destroy the packet + * like above? + */ } } trim_history: - /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ + /* + * Trim history (remove all packets after the NUM_LATE_LOSS + 1 + * data packets) + */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { @@ -1489,15 +1532,18 @@ trim_history: if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { - /* we have found a packet older than one rtt - * remove the rest */ + /* + * We have found a packet older + * than one rtt remove the rest + */ step = 3; } else /* OK, find next data packet */ num_later = 1; break; case 3: list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); + dccp_rx_hist_entry_delete(ccid3_rx_hist, + entry); break; } } else if (dccp_rx_hist_entry_data_packet(entry)) @@ -1564,7 +1610,8 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) u32 x_recv, pinv; struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || + sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; @@ -1658,13 +1705,15 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (step == 0) { - printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " + "data packets!\n", __FUNCTION__, dccp_role(sk), sk); return ~0; } if (interval == 0) { - ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", + ccid3_pr_debug("%s, sk=%p, Could not find a win_count " + "interval > 0. Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } @@ -1688,8 +1737,8 @@ found: fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = calcx_reverse_lookup(fval); - ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ - dccp_role(sk), sk, x_recv, p); + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " + "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); if (p == 0) return ~0; @@ -1704,25 +1753,31 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) struct ccid3_loss_interval_hist_entry *li_entry; if (seq_loss != DCCP_MAX_SEQNO + 1) { - ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, " + "packet loss detected\n", dccp_role(sk), sk, seq_loss, win_loss); if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { struct ccid3_loss_interval_hist_entry *li_tail = NULL; int i; - ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, first loss event detected, " + "creating history\n", + dccp_role(sk), sk); for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); if (li_entry == NULL) { ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); - ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", + ccid3_pr_debug("%s, sk=%p, not enough " + "mem for creating " + "history\n", dccp_role(sk), sk); return; } if (li_tail == NULL) li_tail = li_entry; - list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); + list_add(&li_entry->ccid3lih_node, + &hcrx->ccid3hcrx_loss_interval_hist); } li_entry->ccid3lih_seqno = seq_loss; @@ -1772,11 +1827,13 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { /* no loss event have occured yet */ ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " - "packet by comparing to initial seqno\n", + "packet by comparing to initial " + "seqno\n", dccp_role(sk), sk); goto out_update_li; } else { - pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " + "packets in history", __FUNCTION__, dccp_role(sk), sk); return; } @@ -1831,7 +1888,9 @@ static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) u32 i_tot1 = 0; u32 w_tot = 0; - list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { + list_for_each_entry_safe(li_entry, li_next, + &hcrx->ccid3hcrx_loss_interval_hist, + ccid3lih_node) { if (i < TFRC_RECV_IVAL_F_LENGTH) { i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; w_tot += ccid3_hc_rx_w[i]; @@ -1845,7 +1904,8 @@ static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) } if (i != TFRC_RECV_IVAL_F_LENGTH) { - pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", + pr_info("%s: %s, sk=%p, ERROR! Missing entry in " + "interval history!\n", __FUNCTION__, dccp_role(sk), sk); return 0; } @@ -1870,11 +1930,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) u8 win_count; u32 p_prev; int ins; -#if 0 - ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), - skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); -#endif + if (hcrx == NULL) return; @@ -1913,7 +1969,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " + "to history (consider it lost)!", dccp_role(sk), sk); return; } @@ -1927,13 +1984,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " + "feedback\n", + dccp_role(sk), sk, + dccp_state_name(sk->sk_state), skb); ccid3_hc_rx_send_feedback(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); return; case TFRC_RSTATE_DATA: - hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; + hcrx->ccid3hcrx_bytes_recv += skb->len - + dccp_hdr(skb)->dccph_doff * 4; if (ins == 0) { if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { @@ -1975,7 +2035,8 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), + gfp_any()); if (hcrx == NULL) return -ENOMEM; @@ -2135,7 +2196,8 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald & Arnaldo Carvalho de Melo "); +MODULE_AUTHOR("Ian McDonald , " + "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); MODULE_ALIAS("net-dccp-ccid-3"); From b6ee3d4ada4e85d9b9b9164c1327ef0850c79d5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 18:18:18 -0300 Subject: [PATCH 477/584] [CCID3]: Reorganise timeval handling Introducing functions to add to or subtract from a timeval variable and renaming now_delta to timeval_new_delta that calls do_gettimeofday and then timeval_delta, that should be used when there are several deltas made relative to the current time or setting variables to it, so as to avoid calling do_gettimeofday excessively. I'm leaving these "timeval_" prefixed funcions internal to DCCP for a while till we're sure there are no subtle bugs in it. It also is more correct as it checks if the number of usecs added to or subtracted from a tv_usec field is more than 2 seconds. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 127 ++++++++++++++++++----------------------- net/dccp/dccp.h | 44 ++++++++++++-- net/dccp/options.c | 5 +- 3 files changed, 98 insertions(+), 78 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 225c53013172..60e3a5f9fcb4 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -156,26 +156,6 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -static void timeval_sub(struct timeval large, struct timeval small, - struct timeval *result) -{ - result->tv_sec = large.tv_sec-small.tv_sec; - if (large.tv_usec < small.tv_usec) { - (result->tv_sec)--; - result->tv_usec = USEC_PER_SEC + - large.tv_usec - small.tv_usec; - } else - result->tv_usec = large.tv_usec-small.tv_usec; -} - -static inline void timeval_fix(struct timeval *tv) -{ - if (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - #define CALCX_ARRSIZE 500 #define CALCX_SPLIT 50000 @@ -816,18 +796,22 @@ static void ccid3_hc_tx_update_x(struct sock *sk) 2 * hctx->ccid3hctx_x_recv), (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)); - } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { - u32 rtt = hctx->ccid3hctx_rtt; - if (rtt < 10) { - rtt = 10; - } /* avoid divide by zero below */ - - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, - 2 * hctx->ccid3hctx_x), - ((hctx->ccid3hctx_s * 100000) / - (rtt / 10))); - /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ - do_gettimeofday(&hctx->ccid3hctx_t_ld); + } else { + struct timeval now; + + do_gettimeofday(&now); + if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= + hctx->ccid3hctx_rtt) { + /* Avoid divide by zero below */ + const u32 rtt = max_t(u32, hctx->ccid3hctx_rtt, 10); + + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, + 2 * hctx->ccid3hctx_x), + ((hctx->ccid3hctx_s * 100000) / + (rtt / 10))); + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + hctx->ccid3hctx_t_ld = now; + } } if (hctx->ccid3hctx_x == 0) { @@ -999,14 +983,15 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Set nominal send time for initial packet */ hctx->ccid3hctx_t_nom = now; - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_delta(hctx); rc = 0; break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta; + delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - + hctx->ccid3hctx_delta); ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ @@ -1068,7 +1053,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) * Algorithm in "8.1. Window Counter Valuer" in * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; @@ -1102,8 +1087,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid3hctx_t_nom = now; ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } break; default: @@ -1167,7 +1152,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = now_delta(packet->dccphtx_tstamp); + r_sample = timeval_now_delta(&packet->dccphtx_tstamp); /* FIXME: */ // r_sample -= usecs_to_jiffies(t_elapsed * 10); @@ -1224,15 +1209,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_tx_update_x(sk); /* Update next send time */ - if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { - hctx->ccid3hctx_t_nom.tv_usec += USEC_PER_SEC; - (hctx->ccid3hctx_t_nom).tv_sec--; - } - /* FIXME - if no feedback then t_ipi can go > 1 second */ - (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ @@ -1559,20 +1540,24 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; + struct timeval now; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + do_gettimeofday(&now); + switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: { - u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + const u32 delta = timeval_delta(&now, + &hcrx->ccid3hcrx_tstamp_last_feedback); - if (delta == 0) - delta = 1; /* to prevent divide by zero */ hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC) / delta; + USEC_PER_SEC); + if (likely(delta > 1)) + hcrx->ccid3hcrx_x_recv /= delta; } break; default: @@ -1590,13 +1575,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) return; } - do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); + hcrx->ccid3hcrx_tstamp_last_feedback = now; hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = now_delta(packet->dccphrx_tstamp) / 10; + hcrx->ccid3hcrx_elapsed_time = + timeval_delta(&now, &packet->dccphrx_tstamp) / 10; if (hcrx->ccid3hcrx_p == 0) hcrx->ccid3hcrx_pinv = ~0; else @@ -1676,7 +1662,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; - struct timeval tstamp = { 0 }, tmp_tv; + struct timeval tstamp = { 0, }; int interval = 0; int win_count = 0; int step = 0; @@ -1718,18 +1704,16 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: - timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); - rtt = (tmp_tv.tv_sec * USEC_PER_SEC + tmp_tv.tv_usec) * 4 / interval; + rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); if (rtt == 0) rtt = 1; - delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); - if (delta == 0) - delta = 1; - - x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC) / delta; + delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; + if (likely(delta > 1)) + x_recv /= delta; tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -1926,7 +1910,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u32 now_usecs; u8 win_count; u32 p_prev; int ins; @@ -1948,8 +1931,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; p_prev = hcrx->ccid3hcrx_rtt; do_gettimeofday(&now); - now_usecs = now.tv_sec * USEC_PER_SEC + now.tv_usec; - hcrx->ccid3hcrx_rtt = now_usecs - + hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - (opt_recv->dccpor_timestamp_echo - opt_recv->dccpor_elapsed_time) * 10; if (p_prev != hcrx->ccid3hcrx_rtt) @@ -1994,15 +1976,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins == 0) { - if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= - hcrx->ccid3hcrx_rtt) { - do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); - ccid3_hc_rx_send_feedback(sk); - } - return; + if (ins != 0) + break; + + do_gettimeofday(&now); + if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= + hcrx->ccid3hcrx_rtt) { + hcrx->ccid3hcrx_tstamp_last_ack = now; + ccid3_hc_rx_send_feedback(sk); } - break; + return; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 6ba21509e797..5cd9e794bbe2 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -429,17 +429,53 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +static inline suseconds_t timeval_usecs(const struct timeval *tv) +{ + return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; +} + +static inline suseconds_t timeval_delta(const struct timeval *large, + const struct timeval *small) +{ + time_t secs = large->tv_sec - small->tv_sec; + suseconds_t usecs = large->tv_usec - small->tv_usec; + + if (usecs < 0) { + secs--; + usecs += USEC_PER_SEC; + } + return secs * USEC_PER_SEC + usecs; +} + +static inline void timeval_add_usecs(struct timeval *tv, + const suseconds_t usecs) +{ + tv->tv_usec += usecs; + while (tv->tv_usec >= USEC_PER_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_PER_SEC; + } +} + +static inline void timeval_sub_usecs(struct timeval *tv, + const suseconds_t usecs) +{ + tv->tv_usec -= usecs; + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + /* * Returns the difference in usecs between timeval * passed in and current time */ -static inline u32 now_delta(struct timeval tv) +static inline suseconds_t timeval_now_delta(const struct timeval *tv) { struct timeval now; - do_gettimeofday(&now); - return (now.tv_sec - tv.tv_sec) * USEC_PER_SEC + - (now.tv_usec - tv.tv_usec); + return timeval_delta(&now, tv); } #ifdef CONFIG_IP_DCCP_DEBUG diff --git a/net/dccp/options.c b/net/dccp/options.c index eabcc8f1c625..382c5894acb2 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -359,7 +359,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = now_delta(ap->dccpap_time) / 10; + const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; unsigned char *to, *from; if (elapsed_time != 0) @@ -451,7 +451,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = now_delta(dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = + timeval_now_delta(&dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; From 6b5e633ab1525b4def3f36b53903b00586e9966d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 20:11:28 -0300 Subject: [PATCH 478/584] [CCID3]: Introduce usecs_div To avoid open coding this all over the place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 109 +++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 70 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 60e3a5f9fcb4..ff41977f1edc 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -40,6 +40,15 @@ #include "../packet_history.h" #include "ccid3.h" +/* + * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + */ +static inline u32 usecs_div(const u32 a, const u32 b) +{ + const u32 tmp = a * (USEC_PER_SEC / 10); + return b > 20 ? tmp / (b / 10) : tmp; +} + #ifdef CCID3_DEBUG extern int ccid3_debug; @@ -748,20 +757,13 @@ static u32 ccid3_calc_x(u16 s, u32 R, u32 p) /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) { - if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) - return; - /* if no feedback spec says t_ipi is 1 second (set elsewhere and then - * doubles after every no feedback timer (separate function) */ - - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) - / (hctx->ccid3hctx_x / 10); - /* reason for above maths with 10 in there is to avoid 32 bit - * overflow for jumbo packets */ - + /* + * If no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) + */ + if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x); } /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ @@ -769,7 +771,6 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) { hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); - } /* @@ -802,22 +803,13 @@ static void ccid3_hc_tx_update_x(struct sock *sk) do_gettimeofday(&now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { - /* Avoid divide by zero below */ - const u32 rtt = max_t(u32, hctx->ccid3hctx_rtt, 10); - - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, - 2 * hctx->ccid3hctx_x), - ((hctx->ccid3hctx_s * 100000) / - (rtt / 10))); - /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, + hctx->ccid3hctx_x) * 2, + usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = now; } } - - if (hctx->ccid3hctx_x == 0) { - ccid3_pr_debug("ccid3hctx_x = 0!\n"); - hctx->ccid3hctx_x = 1; - } } static void ccid3_hc_tx_no_feedback_timer(unsigned long data) @@ -826,7 +818,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - u32 rtt; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -840,19 +831,14 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_TERM: goto out; case TFRC_SSTATE_NO_FBACK: /* Halve send rate */ hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < - (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)) hctx->ccid3hctx_x = (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); @@ -861,9 +847,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10), + next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x), TFRC_INITIAL_TIMEOUT); - /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ /* * FIXME - not sure above calculation is correct. See section * 5 of CCID3 11 should adjust tx_t_ipi and double that to @@ -875,12 +861,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * Check if IDLE since last timeout and recv rate is less than * 4 packets per RTT */ - rtt = hctx->ccid3hctx_rtt; - if (rtt < 10) - rtt = 10; - /* stop divide by zero below */ if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + (hctx->ccid3hctx_x_recv >= + 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); @@ -905,13 +888,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) /* Update sending rate */ ccid3_hc_tx_update_x(sk); } - if (hctx->ccid3hctx_x == 0) { - ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); - hctx->ccid3hctx_x = 10; - } - /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + /* + * Schedule no feedback timer to expire in + * max(4 * R, 2 * s / X) + */ next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, - 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); + 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x)); break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", @@ -1053,8 +1036,10 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) * Algorithm in "8.1. Window Counter Valuer" in * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count) / - (hctx->ccid3hctx_rtt / 4); + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); + if (likely(hctx->ccid3hctx_rtt > 8)) + quarter_rtt /= hctx->ccid3hctx_rtt / 4; + if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + @@ -1171,13 +1156,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; - /* - * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent - * implemention of the new window count. - */ - if (hctx->ccid3hctx_rtt < 4) - hctx->ccid3hctx_rtt = 4; - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " "r_sample=%us\n", dccp_role(sk), sk, hctx->ccid3hctx_rtt, r_sample); @@ -1220,21 +1198,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - /* to prevent divide by zero below */ - /* * Schedule no feedback timer to expire in * max(4 * R, 2 * s / X) */ next_tmout = max(hctx->ccid3hctx_t_rto, - (2 * (hctx->ccid3hctx_s * 100000) / - (hctx->ccid3hctx_x / 10))); - /* maths with 100000 and 10 is to prevent overflow with 32 bit */ - + 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x)); + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", dccp_role(sk), sk, @@ -1350,8 +1321,6 @@ static int ccid3_hc_tx_init(struct sock *sk) /* Set transmission rate to 1 packet per second */ hctx->ccid3hctx_x = hctx->ccid3hctx_s; - /* See ccid3_hc_tx_packet_sent win_count calculatation */ - hctx->ccid3hctx_rtt = 4; hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); From cfc3c525a3b434cabf92bf7054f2c6c93497fbea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 20:20:37 -0300 Subject: [PATCH 479/584] [CCID3]: Move the CCID3 defines to ccid3.h Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 35 ++--------------------------------- net/dccp/ccids/ccid3.h | 42 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ff41977f1edc..cfd11234d8f9 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -49,9 +49,9 @@ static inline u32 usecs_div(const u32 a, const u32 b) return b > 20 ? tmp / (b / 10) : tmp; } -#ifdef CCID3_DEBUG -extern int ccid3_debug; +static int ccid3_debug; +#ifdef CCID3_DEBUG #define ccid3_pr_debug(format, a...) \ do { if (ccid3_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ @@ -60,37 +60,6 @@ extern int ccid3_debug; #define ccid3_pr_debug(format, a...) #endif -#define TFRC_MIN_PACKET_SIZE 16 -#define TFRC_STD_PACKET_SIZE 256 -#define TFRC_MAX_PACKET_SIZE 65535 - -#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) -/* two seconds as per CCID3 spec 11 */ - -#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ - -#define TFRC_WIN_COUNT_PER_RTT 4 -#define TFRC_WIN_COUNT_LIMIT 16 - -#define TFRC_MAX_BACK_OFF_TIME 64 -/* above is in seconds */ - -#define TFRC_SMALLEST_P 40 - -#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ - -/* Number of later packets received before one is considered lost */ -#define TFRC_RECV_NUM_LATE_LOSS 3 - -enum ccid3_options { - TFRC_OPT_LOSS_EVENT_RATE = 192, - TFRC_OPT_LOSS_INTERVALS = 193, - TFRC_OPT_RECEIVE_RATE = 194, -}; - -static int ccid3_debug; - static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5ef72cda7cd7..f8965700bbe9 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -36,8 +36,39 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ -#include +#include #include +#include +#include + +#define TFRC_MIN_PACKET_SIZE 16 +#define TFRC_STD_PACKET_SIZE 256 +#define TFRC_MAX_PACKET_SIZE 65535 + +/* Two seconds as per CCID3 spec */ +#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) + +/* In usecs - half the scheduling granularity as per RFC3448 4.6 */ +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + +/* In seconds */ +#define TFRC_MAX_BACK_OFF_TIME 64 + +#define TFRC_SMALLEST_P 40 + +#define TFRC_RECV_IVAL_F_LENGTH 8 + +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +enum ccid3_options { + TFRC_OPT_LOSS_EVENT_RATE = 192, + TFRC_OPT_LOSS_INTERVALS = 193, + TFRC_OPT_RECEIVE_RATE = 194, +}; struct ccid3_options_received { u64 ccid3or_seqno:48, @@ -47,7 +78,7 @@ struct ccid3_options_received { u32 ccid3or_receive_rate; }; -/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block +/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock * * @ccid3hctx_state - Sender state * @ccid3hctx_x - Current sending rate @@ -57,7 +88,8 @@ struct ccid3_options_received { * @ccid3hctx_rtt - Estimate of current round trip time in usecs * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 * @ccid3hctx_last_win_count - Last window counter sent - * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet + * with last_win_count value sent * @ccid3hctx_no_feedback_timer - Handle to no feedback timer * @ccid3hctx_idle - FIXME * @ccid3hctx_t_ld - Time last doubled during slow start @@ -112,9 +144,9 @@ struct ccid3_hc_rx_sock { }; #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) #endif /* _DCCP_CCID3_H_ */ From ae6706f0678b89de07ad3b456893cc883584f711 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:03:09 -0300 Subject: [PATCH 480/584] [CCID3]: Move the loss interval code to loss_interval.[ch] And put this into net/dccp/ccids/lib/, where packet_history.[ch] will also be moved and then we'll have a tfrc_lib.ko module that will be used by dccp_ccid3.ko and other CCIDs that are variations of TFRC (RFC 3448). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/Makefile | 2 +- net/dccp/ccids/ccid3.c | 155 +++++------------------------ net/dccp/ccids/ccid3.h | 11 +- net/dccp/ccids/lib/loss_interval.c | 144 +++++++++++++++++++++++++++ net/dccp/ccids/lib/loss_interval.h | 61 ++++++++++++ 5 files changed, 234 insertions(+), 139 deletions(-) create mode 100644 net/dccp/ccids/lib/loss_interval.c create mode 100644 net/dccp/ccids/lib/loss_interval.h diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 1c720131c5db..323b68f3b607 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o +dccp_ccid3-y := ccid3.o lib/loss_interval.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cfd11234d8f9..7468928b83c6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -38,6 +38,7 @@ #include "../ccid.h" #include "../dccp.h" #include "../packet_history.h" +#include "lib/loss_interval.h" #include "ccid3.h" /* @@ -62,30 +63,7 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; - -static kmem_cache_t *ccid3_loss_interval_hist_slab __read_mostly; - -static inline struct ccid3_loss_interval_hist_entry * - ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) -{ - return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); -} - -static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_loss_interval_hist_slab, entry); -} - -static void ccid3_loss_interval_history_delete(struct list_head *hist) -{ - struct ccid3_loss_interval_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { - list_del_init(&entry->ccid3lih_node); - kmem_cache_free(ccid3_loss_interval_hist_slab, entry); - } -} +static struct dccp_li_hist *ccid3_li_hist; static int ccid3_init(struct sock *sk) { @@ -1414,7 +1392,7 @@ trim_history: */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; - if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, dccphrx_node) { if (num_later == 0) { @@ -1555,15 +1533,6 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) &x_recv, sizeof(x_recv)); } -/* Weights used to calculate loss event rate */ -/* - * These are integers as per section 8 of RFC3448. We can then divide by 4 * - * when we use it. - */ -static const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { - 4, 4, 4, 4, 3, 2, 1, 1, -}; - /* * args: fvalue - function value to match * returns: p closest to that value @@ -1672,41 +1641,17 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_loss_interval_hist_entry *li_entry; - if (seq_loss != DCCP_MAX_SEQNO + 1) { - ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, " - "packet loss detected\n", - dccp_role(sk), sk, seq_loss, win_loss); - - if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { - struct ccid3_loss_interval_hist_entry *li_tail = NULL; - int i; + if (seq_loss != DCCP_MAX_SEQNO + 1 && + list_empty(&hcrx->ccid3hcrx_li_hist)) { + struct dccp_li_hist_entry *li_tail; - ccid3_pr_debug("%s, sk=%p, first loss event detected, " - "creating history\n", - dccp_role(sk), sk); - for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { - li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); - if (li_entry == NULL) { - ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); - ccid3_pr_debug("%s, sk=%p, not enough " - "mem for creating " - "history\n", - dccp_role(sk), sk); - return; - } - if (li_tail == NULL) - li_tail = li_entry; - list_add(&li_entry->ccid3lih_node, - &hcrx->ccid3hcrx_loss_interval_hist); - } - - li_entry->ccid3lih_seqno = seq_loss; - li_entry->ccid3lih_win_count = win_loss; - - li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); - } + li_tail = dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, + seq_loss, win_loss); + if (li_tail == NULL) + return; + li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); } /* FIXME: find end of interval */ } @@ -1746,12 +1691,11 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (a_loss == NULL) { - if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss event have occured yet */ - ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " - "packet by comparing to initial " - "seqno\n", - dccp_role(sk), sk); + LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " + "comparing to initial seqno\n", + dccp_role(sk)); goto out_update_li; } else { pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " @@ -1799,48 +1743,6 @@ out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); } -static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_loss_interval_hist_entry *li_entry, *li_next; - int i = 0; - u32 i_tot; - u32 i_tot0 = 0; - u32 i_tot1 = 0; - u32 w_tot = 0; - - list_for_each_entry_safe(li_entry, li_next, - &hcrx->ccid3hcrx_loss_interval_hist, - ccid3lih_node) { - if (i < TFRC_RECV_IVAL_F_LENGTH) { - i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; - w_tot += ccid3_hc_rx_w[i]; - } - - if (i != 0) - i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; - - if (++i > TFRC_RECV_IVAL_F_LENGTH) - break; - } - - if (i != TFRC_RECV_IVAL_F_LENGTH) { - pr_info("%s: %s, sk=%p, ERROR! Missing entry in " - "interval history!\n", - __FUNCTION__, dccp_role(sk), sk); - return 0; - } - - i_tot = max(i_tot0, i_tot1); - - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; - - return i_tot * 4 / w_tot; -} - static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -1939,9 +1841,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ - if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) + if (!list_empty(&hcrx->ccid3hcrx_li_hist)) /* Scaling up by 1000000 as fixed decimal */ - hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); + hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); if (hcrx->ccid3hcrx_p > p_prev) { ccid3_hc_rx_send_feedback(sk); @@ -1971,7 +1873,7 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); - INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); + INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); /* * XXX this seems to be paranoid, need to think more about this, for * now start with something different than zero. -acme @@ -1996,7 +1898,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ - ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); kfree(dp->dccps_hc_rx_ccid_private); dp->dccps_hc_rx_ccid_private = NULL; @@ -2063,11 +1965,8 @@ static __init int ccid3_module_init(void) if (ccid3_tx_hist == NULL) goto out_free_rx; - ccid3_loss_interval_hist_slab = kmem_cache_create("li_hist_ccid3", - sizeof(struct ccid3_loss_interval_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (ccid3_loss_interval_hist_slab == NULL) + ccid3_li_hist = dccp_li_hist_new("ccid3"); + if (ccid3_li_hist == NULL) goto out_free_tx; rc = ccid_register(&ccid3); @@ -2077,8 +1976,8 @@ out: return rc; out_free_loss_interval_history: - kmem_cache_destroy(ccid3_loss_interval_hist_slab); - ccid3_loss_interval_hist_slab = NULL; + dccp_li_hist_delete(ccid3_li_hist); + ccid3_li_hist = NULL; out_free_tx: dccp_tx_hist_delete(ccid3_tx_hist); ccid3_tx_hist = NULL; @@ -2110,9 +2009,9 @@ static __exit void ccid3_module_exit(void) dccp_rx_hist_delete(ccid3_rx_hist); ccid3_rx_hist = NULL; } - if (ccid3_loss_interval_hist_slab != NULL) { - kmem_cache_destroy(ccid3_loss_interval_hist_slab); - ccid3_loss_interval_hist_slab = NULL; + if (ccid3_li_hist != NULL) { + dccp_li_hist_delete(ccid3_li_hist); + ccid3_li_hist = NULL; } } module_exit(ccid3_module_exit); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f8965700bbe9..f68d0b4e31e9 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -59,8 +59,6 @@ #define TFRC_SMALLEST_P 40 -#define TFRC_RECV_IVAL_F_LENGTH 8 - /* Number of later packets received before one is considered lost */ #define TFRC_RECV_NUM_LATE_LOSS 3 @@ -119,13 +117,6 @@ struct ccid3_hc_tx_sock { struct ccid3_options_received ccid3hctx_options_received; }; -struct ccid3_loss_interval_hist_entry { - struct list_head ccid3lih_node; - u64 ccid3lih_seqno:48, - ccid3lih_win_count:4; - u32 ccid3lih_interval; -}; - struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, @@ -136,7 +127,7 @@ struct ccid3_hc_rx_sock { struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; - struct list_head ccid3hcrx_loss_interval_hist; + struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c new file mode 100644 index 000000000000..4c01a54143ad --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.c @@ -0,0 +1,144 @@ +/* + * net/dccp/ccids/lib/loss_interval.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include + +#include "loss_interval.h" + +struct dccp_li_hist *dccp_li_hist_new(const char *name) +{ + struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_li_hist_mask[] = "li_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_li_hist_mask, name); + hist->dccplih_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_li_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccplih_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_new); + +void dccp_li_hist_delete(struct dccp_li_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccplih_slab); + + kmem_cache_destroy(hist->dccplih_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_delete); + +void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) +{ + struct dccp_li_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccplih_node) { + list_del_init(&entry->dccplih_node); + kmem_cache_free(hist->dccplih_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_purge); + +/* Weights used to calculate loss event rate */ +/* + * These are integers as per section 8 of RFC3448. We can then divide by 4 * + * when we use it. + */ +static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { + 4, 4, 4, 4, 3, 2, 1, 1, +}; + +u32 dccp_li_hist_calc_i_mean(struct list_head *list) +{ + struct dccp_li_hist_entry *li_entry, *li_next; + int i = 0; + u32 i_tot; + u32 i_tot0 = 0; + u32 i_tot1 = 0; + u32 w_tot = 0; + + list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { + if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; + w_tot += dccp_li_hist_w[i]; + } + + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; + + if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) + break; + } + + if (i != DCCP_LI_HIST_IVAL_F_LENGTH) + return 0; + + i_tot = max(i_tot0, i_tot1); + + /* FIXME: Why do we do this? -Ian McDonald */ + if (i_tot * 4 < w_tot) + i_tot = w_tot * 4; + + return i_tot * 4 / w_tot; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); + +struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, + const u64 seq_loss, + const u8 win_loss) +{ + struct dccp_li_hist_entry *tail = NULL, *entry; + int i; + + for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); + if (entry == NULL) { + dccp_li_hist_purge(hist, list); + return NULL; + } + if (tail == NULL) + tail = entry; + list_add(&entry->dccplih_node, list); + } + + entry->dccplih_seqno = seq_loss; + entry->dccplih_win_count = win_loss; + return tail; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h new file mode 100644 index 000000000000..13ad47ba1420 --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.h @@ -0,0 +1,61 @@ +#ifndef _DCCP_LI_HIST_ +#define _DCCP_LI_HIST_ +/* + * net/dccp/ccids/lib/loss_interval.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include + +#define DCCP_LI_HIST_IVAL_F_LENGTH 8 + +struct dccp_li_hist { + kmem_cache_t *dccplih_slab; +}; + +extern struct dccp_li_hist *dccp_li_hist_new(const char *name); +extern void dccp_li_hist_delete(struct dccp_li_hist *hist); + +struct dccp_li_hist_entry { + struct list_head dccplih_node; + u64 dccplih_seqno:48, + dccplih_win_count:4; + u32 dccplih_interval; +}; + +static inline struct dccp_li_hist_entry * + dccp_li_hist_entry_new(struct dccp_li_hist *hist, + const unsigned int __nocast prio) +{ + return kmem_cache_alloc(hist->dccplih_slab, prio); +} + +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, + struct dccp_li_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccplih_slab, entry); +} + +extern void dccp_li_hist_purge(struct dccp_li_hist *hist, + struct list_head *list); + +extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); + +extern struct dccp_li_hist_entry * + dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, + const u64 seq_loss, + const u8 win_loss); +#endif /* _DCCP_LI_HIST_ */ From 4524b259541e1eea07020af825d8e7b0e4faaec5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:18:26 -0300 Subject: [PATCH 481/584] [DCCP]: Just move packet_history.[ch] to net/dccp/ccids/lib/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Makefile | 2 +- net/dccp/ccids/Makefile | 2 +- net/dccp/ccids/ccid3.c | 2 +- net/dccp/{ => ccids/lib}/packet_history.c | 0 net/dccp/{ => ccids/lib}/packet_history.h | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename net/dccp/{ => ccids/lib}/packet_history.c (100%) rename net/dccp/{ => ccids/lib}/packet_history.h (99%) diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 44a867f29184..fb97bb042455 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ - timer.o packet_history.o + timer.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 323b68f3b607..29eb1b61fdbd 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o lib/loss_interval.o +dccp_ccid3-y := ccid3.o lib/loss_interval.o lib/packet_history.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7468928b83c6..12548fbde86b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -37,7 +37,7 @@ #include #include "../ccid.h" #include "../dccp.h" -#include "../packet_history.h" +#include "lib/packet_history.h" #include "lib/loss_interval.h" #include "ccid3.h" diff --git a/net/dccp/packet_history.c b/net/dccp/ccids/lib/packet_history.c similarity index 100% rename from net/dccp/packet_history.c rename to net/dccp/ccids/lib/packet_history.c diff --git a/net/dccp/packet_history.h b/net/dccp/ccids/lib/packet_history.h similarity index 99% rename from net/dccp/packet_history.h rename to net/dccp/ccids/lib/packet_history.h index 2e5ba343e3dd..235828d822d9 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -42,7 +42,7 @@ #include #include -#include "dccp.h" +#include "../../dccp.h" struct dccp_tx_hist_entry { struct list_head dccphtx_node; From 5cea0ddce56ff3406a81fbbab80ef45c65701673 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:50:46 -0300 Subject: [PATCH 482/584] [DCCP]: Introduce dccp_tfrc_lib module with net/dccp/ccids/lib/*.c I'll now take a look at the other proposed TFRC DCCP CCIDs to find more code that is now in ccid3.c and move to this module, the loss event rate, calc_X, etc most probably will be moved there. The main goal of these changes is to pave the way for the implementation of more TFRC based DCCP CCIDs and to shrink ccid3.c, reducing its complexity and helping in getting it rock solid. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/Kconfig | 4 ++++ net/dccp/ccids/Makefile | 4 +++- net/dccp/ccids/lib/Makefile | 3 +++ net/dccp/ccids/lib/packet_history.c | 6 ++++++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 net/dccp/ccids/lib/Makefile diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 67f9c06bd179..7684d83946a4 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -22,4 +22,8 @@ config IP_DCCP_CCID3 If in doubt, say M. +config IP_DCCP_TFRC_LIB + depends on IP_DCCP_CCID3 + def_tristate IP_DCCP_CCID3 + endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 29eb1b61fdbd..956f79f50743 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o lib/loss_interval.o lib/packet_history.o +dccp_ccid3-y := ccid3.o + +obj-y += lib/ diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile new file mode 100644 index 000000000000..e9a91e238c88 --- /dev/null +++ b/net/dccp/ccids/lib/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o + +dccp_tfrc_lib-y := loss_interval.o packet_history.o diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 2d9ef5ae0bf6..f252a9555e31 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -35,6 +35,7 @@ */ #include +#include #include #include "packet_history.h" @@ -197,3 +198,8 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) } EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); + +MODULE_AUTHOR("Ian McDonald , " + "Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP TFRC library"); +MODULE_LICENSE("GPL"); From 36729c1a73c354a155db18d64d9e79b86c446fcf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 00:47:15 -0300 Subject: [PATCH 483/584] [DCCP]: Move the calc_X routines to dccp_tfrc_lib Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 624 +--------------------------- net/dccp/ccids/lib/Makefile | 2 +- net/dccp/ccids/lib/tfrc.h | 22 + net/dccp/ccids/lib/tfrc_equation.c | 644 +++++++++++++++++++++++++++++ 4 files changed, 672 insertions(+), 620 deletions(-) create mode 100644 net/dccp/ccids/lib/tfrc.h create mode 100644 net/dccp/ccids/lib/tfrc_equation.c diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 12548fbde86b..a215c46d6f1b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -39,6 +39,7 @@ #include "../dccp.h" #include "lib/packet_history.h" #include "lib/loss_interval.h" +#include "lib/tfrc.h" #include "ccid3.h" /* @@ -112,595 +113,6 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -#define CALCX_ARRSIZE 500 - -#define CALCX_SPLIT 50000 -/* equivalent to 0.05 */ - -static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { - { 37172 , 8172 }, - { 53499 , 11567 }, - { 66664 , 14180 }, - { 78298 , 16388 }, - { 89021 , 18339 }, - { 99147 , 20108 }, - { 108858 , 21738 }, - { 118273 , 23260 }, - { 127474 , 24693 }, - { 136520 , 26052 }, - { 145456 , 27348 }, - { 154316 , 28589 }, - { 163130 , 29783 }, - { 171919 , 30935 }, - { 180704 , 32049 }, - { 189502 , 33130 }, - { 198328 , 34180 }, - { 207194 , 35202 }, - { 216114 , 36198 }, - { 225097 , 37172 }, - { 234153 , 38123 }, - { 243294 , 39055 }, - { 252527 , 39968 }, - { 261861 , 40864 }, - { 271305 , 41743 }, - { 280866 , 42607 }, - { 290553 , 43457 }, - { 300372 , 44293 }, - { 310333 , 45117 }, - { 320441 , 45929 }, - { 330705 , 46729 }, - { 341131 , 47518 }, - { 351728 , 48297 }, - { 362501 , 49066 }, - { 373460 , 49826 }, - { 384609 , 50577 }, - { 395958 , 51320 }, - { 407513 , 52054 }, - { 419281 , 52780 }, - { 431270 , 53499 }, - { 443487 , 54211 }, - { 455940 , 54916 }, - { 468635 , 55614 }, - { 481581 , 56306 }, - { 494785 , 56991 }, - { 508254 , 57671 }, - { 521996 , 58345 }, - { 536019 , 59014 }, - { 550331 , 59677 }, - { 564939 , 60335 }, - { 579851 , 60988 }, - { 595075 , 61636 }, - { 610619 , 62279 }, - { 626491 , 62918 }, - { 642700 , 63553 }, - { 659253 , 64183 }, - { 676158 , 64809 }, - { 693424 , 65431 }, - { 711060 , 66050 }, - { 729073 , 66664 }, - { 747472 , 67275 }, - { 766266 , 67882 }, - { 785464 , 68486 }, - { 805073 , 69087 }, - { 825103 , 69684 }, - { 845562 , 70278 }, - { 866460 , 70868 }, - { 887805 , 71456 }, - { 909606 , 72041 }, - { 931873 , 72623 }, - { 954614 , 73202 }, - { 977839 , 73778 }, - { 1001557 , 74352 }, - { 1025777 , 74923 }, - { 1050508 , 75492 }, - { 1075761 , 76058 }, - { 1101544 , 76621 }, - { 1127867 , 77183 }, - { 1154739 , 77741 }, - { 1182172 , 78298 }, - { 1210173 , 78852 }, - { 1238753 , 79405 }, - { 1267922 , 79955 }, - { 1297689 , 80503 }, - { 1328066 , 81049 }, - { 1359060 , 81593 }, - { 1390684 , 82135 }, - { 1422947 , 82675 }, - { 1455859 , 83213 }, - { 1489430 , 83750 }, - { 1523671 , 84284 }, - { 1558593 , 84817 }, - { 1594205 , 85348 }, - { 1630518 , 85878 }, - { 1667543 , 86406 }, - { 1705290 , 86932 }, - { 1743770 , 87457 }, - { 1782994 , 87980 }, - { 1822973 , 88501 }, - { 1863717 , 89021 }, - { 1905237 , 89540 }, - { 1947545 , 90057 }, - { 1990650 , 90573 }, - { 2034566 , 91087 }, - { 2079301 , 91600 }, - { 2124869 , 92111 }, - { 2171279 , 92622 }, - { 2218543 , 93131 }, - { 2266673 , 93639 }, - { 2315680 , 94145 }, - { 2365575 , 94650 }, - { 2416371 , 95154 }, - { 2468077 , 95657 }, - { 2520707 , 96159 }, - { 2574271 , 96660 }, - { 2628782 , 97159 }, - { 2684250 , 97658 }, - { 2740689 , 98155 }, - { 2798110 , 98651 }, - { 2856524 , 99147 }, - { 2915944 , 99641 }, - { 2976382 , 100134 }, - { 3037850 , 100626 }, - { 3100360 , 101117 }, - { 3163924 , 101608 }, - { 3228554 , 102097 }, - { 3294263 , 102586 }, - { 3361063 , 103073 }, - { 3428966 , 103560 }, - { 3497984 , 104045 }, - { 3568131 , 104530 }, - { 3639419 , 105014 }, - { 3711860 , 105498 }, - { 3785467 , 105980 }, - { 3860253 , 106462 }, - { 3936229 , 106942 }, - { 4013410 , 107422 }, - { 4091808 , 107902 }, - { 4171435 , 108380 }, - { 4252306 , 108858 }, - { 4334431 , 109335 }, - { 4417825 , 109811 }, - { 4502501 , 110287 }, - { 4588472 , 110762 }, - { 4675750 , 111236 }, - { 4764349 , 111709 }, - { 4854283 , 112182 }, - { 4945564 , 112654 }, - { 5038206 , 113126 }, - { 5132223 , 113597 }, - { 5227627 , 114067 }, - { 5324432 , 114537 }, - { 5422652 , 115006 }, - { 5522299 , 115474 }, - { 5623389 , 115942 }, - { 5725934 , 116409 }, - { 5829948 , 116876 }, - { 5935446 , 117342 }, - { 6042439 , 117808 }, - { 6150943 , 118273 }, - { 6260972 , 118738 }, - { 6372538 , 119202 }, - { 6485657 , 119665 }, - { 6600342 , 120128 }, - { 6716607 , 120591 }, - { 6834467 , 121053 }, - { 6953935 , 121514 }, - { 7075025 , 121976 }, - { 7197752 , 122436 }, - { 7322131 , 122896 }, - { 7448175 , 123356 }, - { 7575898 , 123815 }, - { 7705316 , 124274 }, - { 7836442 , 124733 }, - { 7969291 , 125191 }, - { 8103877 , 125648 }, - { 8240216 , 126105 }, - { 8378321 , 126562 }, - { 8518208 , 127018 }, - { 8659890 , 127474 }, - { 8803384 , 127930 }, - { 8948702 , 128385 }, - { 9095861 , 128840 }, - { 9244875 , 129294 }, - { 9395760 , 129748 }, - { 9548529 , 130202 }, - { 9703198 , 130655 }, - { 9859782 , 131108 }, - { 10018296 , 131561 }, - { 10178755 , 132014 }, - { 10341174 , 132466 }, - { 10505569 , 132917 }, - { 10671954 , 133369 }, - { 10840345 , 133820 }, - { 11010757 , 134271 }, - { 11183206 , 134721 }, - { 11357706 , 135171 }, - { 11534274 , 135621 }, - { 11712924 , 136071 }, - { 11893673 , 136520 }, - { 12076536 , 136969 }, - { 12261527 , 137418 }, - { 12448664 , 137867 }, - { 12637961 , 138315 }, - { 12829435 , 138763 }, - { 13023101 , 139211 }, - { 13218974 , 139658 }, - { 13417071 , 140106 }, - { 13617407 , 140553 }, - { 13819999 , 140999 }, - { 14024862 , 141446 }, - { 14232012 , 141892 }, - { 14441465 , 142339 }, - { 14653238 , 142785 }, - { 14867346 , 143230 }, - { 15083805 , 143676 }, - { 15302632 , 144121 }, - { 15523842 , 144566 }, - { 15747453 , 145011 }, - { 15973479 , 145456 }, - { 16201939 , 145900 }, - { 16432847 , 146345 }, - { 16666221 , 146789 }, - { 16902076 , 147233 }, - { 17140429 , 147677 }, - { 17381297 , 148121 }, - { 17624696 , 148564 }, - { 17870643 , 149007 }, - { 18119154 , 149451 }, - { 18370247 , 149894 }, - { 18623936 , 150336 }, - { 18880241 , 150779 }, - { 19139176 , 151222 }, - { 19400759 , 151664 }, - { 19665007 , 152107 }, - { 19931936 , 152549 }, - { 20201564 , 152991 }, - { 20473907 , 153433 }, - { 20748982 , 153875 }, - { 21026807 , 154316 }, - { 21307399 , 154758 }, - { 21590773 , 155199 }, - { 21876949 , 155641 }, - { 22165941 , 156082 }, - { 22457769 , 156523 }, - { 22752449 , 156964 }, - { 23049999 , 157405 }, - { 23350435 , 157846 }, - { 23653774 , 158287 }, - { 23960036 , 158727 }, - { 24269236 , 159168 }, - { 24581392 , 159608 }, - { 24896521 , 160049 }, - { 25214642 , 160489 }, - { 25535772 , 160929 }, - { 25859927 , 161370 }, - { 26187127 , 161810 }, - { 26517388 , 162250 }, - { 26850728 , 162690 }, - { 27187165 , 163130 }, - { 27526716 , 163569 }, - { 27869400 , 164009 }, - { 28215234 , 164449 }, - { 28564236 , 164889 }, - { 28916423 , 165328 }, - { 29271815 , 165768 }, - { 29630428 , 166208 }, - { 29992281 , 166647 }, - { 30357392 , 167087 }, - { 30725779 , 167526 }, - { 31097459 , 167965 }, - { 31472452 , 168405 }, - { 31850774 , 168844 }, - { 32232445 , 169283 }, - { 32617482 , 169723 }, - { 33005904 , 170162 }, - { 33397730 , 170601 }, - { 33792976 , 171041 }, - { 34191663 , 171480 }, - { 34593807 , 171919 }, - { 34999428 , 172358 }, - { 35408544 , 172797 }, - { 35821174 , 173237 }, - { 36237335 , 173676 }, - { 36657047 , 174115 }, - { 37080329 , 174554 }, - { 37507197 , 174993 }, - { 37937673 , 175433 }, - { 38371773 , 175872 }, - { 38809517 , 176311 }, - { 39250924 , 176750 }, - { 39696012 , 177190 }, - { 40144800 , 177629 }, - { 40597308 , 178068 }, - { 41053553 , 178507 }, - { 41513554 , 178947 }, - { 41977332 , 179386 }, - { 42444904 , 179825 }, - { 42916290 , 180265 }, - { 43391509 , 180704 }, - { 43870579 , 181144 }, - { 44353520 , 181583 }, - { 44840352 , 182023 }, - { 45331092 , 182462 }, - { 45825761 , 182902 }, - { 46324378 , 183342 }, - { 46826961 , 183781 }, - { 47333531 , 184221 }, - { 47844106 , 184661 }, - { 48358706 , 185101 }, - { 48877350 , 185541 }, - { 49400058 , 185981 }, - { 49926849 , 186421 }, - { 50457743 , 186861 }, - { 50992759 , 187301 }, - { 51531916 , 187741 }, - { 52075235 , 188181 }, - { 52622735 , 188622 }, - { 53174435 , 189062 }, - { 53730355 , 189502 }, - { 54290515 , 189943 }, - { 54854935 , 190383 }, - { 55423634 , 190824 }, - { 55996633 , 191265 }, - { 56573950 , 191706 }, - { 57155606 , 192146 }, - { 57741621 , 192587 }, - { 58332014 , 193028 }, - { 58926806 , 193470 }, - { 59526017 , 193911 }, - { 60129666 , 194352 }, - { 60737774 , 194793 }, - { 61350361 , 195235 }, - { 61967446 , 195677 }, - { 62589050 , 196118 }, - { 63215194 , 196560 }, - { 63845897 , 197002 }, - { 64481179 , 197444 }, - { 65121061 , 197886 }, - { 65765563 , 198328 }, - { 66414705 , 198770 }, - { 67068508 , 199213 }, - { 67726992 , 199655 }, - { 68390177 , 200098 }, - { 69058085 , 200540 }, - { 69730735 , 200983 }, - { 70408147 , 201426 }, - { 71090343 , 201869 }, - { 71777343 , 202312 }, - { 72469168 , 202755 }, - { 73165837 , 203199 }, - { 73867373 , 203642 }, - { 74573795 , 204086 }, - { 75285124 , 204529 }, - { 76001380 , 204973 }, - { 76722586 , 205417 }, - { 77448761 , 205861 }, - { 78179926 , 206306 }, - { 78916102 , 206750 }, - { 79657310 , 207194 }, - { 80403571 , 207639 }, - { 81154906 , 208084 }, - { 81911335 , 208529 }, - { 82672880 , 208974 }, - { 83439562 , 209419 }, - { 84211402 , 209864 }, - { 84988421 , 210309 }, - { 85770640 , 210755 }, - { 86558080 , 211201 }, - { 87350762 , 211647 }, - { 88148708 , 212093 }, - { 88951938 , 212539 }, - { 89760475 , 212985 }, - { 90574339 , 213432 }, - { 91393551 , 213878 }, - { 92218133 , 214325 }, - { 93048107 , 214772 }, - { 93883493 , 215219 }, - { 94724314 , 215666 }, - { 95570590 , 216114 }, - { 96422343 , 216561 }, - { 97279594 , 217009 }, - { 98142366 , 217457 }, - { 99010679 , 217905 }, - { 99884556 , 218353 }, - { 100764018 , 218801 }, - { 101649086 , 219250 }, - { 102539782 , 219698 }, - { 103436128 , 220147 }, - { 104338146 , 220596 }, - { 105245857 , 221046 }, - { 106159284 , 221495 }, - { 107078448 , 221945 }, - { 108003370 , 222394 }, - { 108934074 , 222844 }, - { 109870580 , 223294 }, - { 110812910 , 223745 }, - { 111761087 , 224195 }, - { 112715133 , 224646 }, - { 113675069 , 225097 }, - { 114640918 , 225548 }, - { 115612702 , 225999 }, - { 116590442 , 226450 }, - { 117574162 , 226902 }, - { 118563882 , 227353 }, - { 119559626 , 227805 }, - { 120561415 , 228258 }, - { 121569272 , 228710 }, - { 122583219 , 229162 }, - { 123603278 , 229615 }, - { 124629471 , 230068 }, - { 125661822 , 230521 }, - { 126700352 , 230974 }, - { 127745083 , 231428 }, - { 128796039 , 231882 }, - { 129853241 , 232336 }, - { 130916713 , 232790 }, - { 131986475 , 233244 }, - { 133062553 , 233699 }, - { 134144966 , 234153 }, - { 135233739 , 234608 }, - { 136328894 , 235064 }, - { 137430453 , 235519 }, - { 138538440 , 235975 }, - { 139652876 , 236430 }, - { 140773786 , 236886 }, - { 141901190 , 237343 }, - { 143035113 , 237799 }, - { 144175576 , 238256 }, - { 145322604 , 238713 }, - { 146476218 , 239170 }, - { 147636442 , 239627 }, - { 148803298 , 240085 }, - { 149976809 , 240542 }, - { 151156999 , 241000 }, - { 152343890 , 241459 }, - { 153537506 , 241917 }, - { 154737869 , 242376 }, - { 155945002 , 242835 }, - { 157158929 , 243294 }, - { 158379673 , 243753 }, - { 159607257 , 244213 }, - { 160841704 , 244673 }, - { 162083037 , 245133 }, - { 163331279 , 245593 }, - { 164586455 , 246054 }, - { 165848586 , 246514 }, - { 167117696 , 246975 }, - { 168393810 , 247437 }, - { 169676949 , 247898 }, - { 170967138 , 248360 }, - { 172264399 , 248822 }, - { 173568757 , 249284 }, - { 174880235 , 249747 }, - { 176198856 , 250209 }, - { 177524643 , 250672 }, - { 178857621 , 251136 }, - { 180197813 , 251599 }, - { 181545242 , 252063 }, - { 182899933 , 252527 }, - { 184261908 , 252991 }, - { 185631191 , 253456 }, - { 187007807 , 253920 }, - { 188391778 , 254385 }, - { 189783129 , 254851 }, - { 191181884 , 255316 }, - { 192588065 , 255782 }, - { 194001698 , 256248 }, - { 195422805 , 256714 }, - { 196851411 , 257181 }, - { 198287540 , 257648 }, - { 199731215 , 258115 }, - { 201182461 , 258582 }, - { 202641302 , 259050 }, - { 204107760 , 259518 }, - { 205581862 , 259986 }, - { 207063630 , 260454 }, - { 208553088 , 260923 }, - { 210050262 , 261392 }, - { 211555174 , 261861 }, - { 213067849 , 262331 }, - { 214588312 , 262800 }, - { 216116586 , 263270 }, - { 217652696 , 263741 }, - { 219196666 , 264211 }, - { 220748520 , 264682 }, - { 222308282 , 265153 }, - { 223875978 , 265625 }, - { 225451630 , 266097 }, - { 227035265 , 266569 }, - { 228626905 , 267041 }, - { 230226576 , 267514 }, - { 231834302 , 267986 }, - { 233450107 , 268460 }, - { 235074016 , 268933 }, - { 236706054 , 269407 }, - { 238346244 , 269881 }, - { 239994613 , 270355 }, - { 241651183 , 270830 }, - { 243315981 , 271305 } -}; - -/* Calculate the send rate as per section 3.1 of RFC3448 - -Returns send rate in bytes per second - -Integer maths and lookups are used as not allowed floating point in kernel - -The function for Xcalc as per section 3.1 of RFC3448 is: - -X = s - ------------------------------------------------------------- - R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) - -where -X is the trasmit rate in bytes/second -s is the packet size in bytes -R is the round trip time in seconds -p is the loss event rate, between 0 and 1.0, of the number of loss events - as a fraction of the number of packets transmitted -t_RTO is the TCP retransmission timeout value in seconds -b is the number of packets acknowledged by a single TCP acknowledgement - -we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: - -X = s - ----------------------------------------------------------------------- - R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) - - -which we can break down into: - -X = s - -------- - R * f(p) - -where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) - -Function parameters: -s - bytes -R - RTT in usecs -p - loss rate (decimal fraction multiplied by 1,000,000) - -Returns Xcalc in bytes per second - -DON'T alter this code unless you run test cases against it as the code -has been manipulated to stop underflow/overlow. - -*/ -static u32 ccid3_calc_x(u16 s, u32 R, u32 p) -{ - int index; - u32 f; - u64 tmp1, tmp2; - - if (p < CALCX_SPLIT) - index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; - else - index = (p / (1000000 / CALCX_ARRSIZE)) - 1; - - if (index < 0) - /* p should be 0 unless there is a bug in my code */ - index = 0; - - if (R == 0) - R = 1; /* RTT can't be zero or else divide by zero */ - - BUG_ON(index >= CALCX_ARRSIZE); - - if (p >= CALCX_SPLIT) - f = calcx_lookup[index][0]; - else - f = calcx_lookup[index][1]; - - tmp1 = ((u64)s * 100000000); - tmp2 = ((u64)R * (u64)f); - do_div(tmp2,10000); - do_div(tmp1,tmp2); - /* don't alter above math unless you test due to overflow on 32 bit */ - - return (u32)tmp1; -} - /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) { @@ -737,9 +149,9 @@ static void ccid3_hc_tx_update_x(struct sock *sk) /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { - hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); + hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), (hctx->ccid3hctx_s / @@ -1533,32 +945,6 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) &x_recv, sizeof(x_recv)); } -/* - * args: fvalue - function value to match - * returns: p closest to that value - * - * both fvalue and p are multiplied by 1,000,000 to use ints - */ -static u32 calcx_reverse_lookup(u32 fvalue) { - int ctr = 0; - int small; - - if (fvalue < calcx_lookup[0][1]) - return 0; - if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) - small = 1; - else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) - return 1000000; - else - small = 0; - while (fvalue > calcx_lookup[ctr][small]) - ctr++; - if (small) - return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); - else - return (1000000 * ctr / CALCX_ARRSIZE) ; -} - /* calculate first loss interval * * returns estimated loss interval in usecs */ @@ -1627,7 +1013,7 @@ found: tmp2 = (u32)tmp1; fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ - p = calcx_reverse_lookup(fval); + p = tfrc_calc_x_reverse_lookup(fval); ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile index e9a91e238c88..5f940a6cbaca 100644 --- a/net/dccp/ccids/lib/Makefile +++ b/net/dccp/ccids/lib/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o -dccp_tfrc_lib-y := loss_interval.o packet_history.o +dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h new file mode 100644 index 000000000000..130c4c40cfe3 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc.h @@ -0,0 +1,22 @@ +#ifndef _TFRC_H_ +#define _TFRC_H_ +/* + * net/dccp/ccids/lib/tfrc.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); +extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); + +#endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c new file mode 100644 index 000000000000..d2b5933b4510 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -0,0 +1,644 @@ +/* + * net/dccp/ccids/lib/tfrc_equation.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include + +#include +#include + +#include "tfrc.h" + +#define TFRC_CALC_X_ARRSIZE 500 + +#define TFRC_CALC_X_SPLIT 50000 +/* equivalent to 0.05 */ + +static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { + { 37172, 8172 }, + { 53499, 11567 }, + { 66664, 14180 }, + { 78298, 16388 }, + { 89021, 18339 }, + { 99147, 20108 }, + { 108858, 21738 }, + { 118273, 23260 }, + { 127474, 24693 }, + { 136520, 26052 }, + { 145456, 27348 }, + { 154316, 28589 }, + { 163130, 29783 }, + { 171919, 30935 }, + { 180704, 32049 }, + { 189502, 33130 }, + { 198328, 34180 }, + { 207194, 35202 }, + { 216114, 36198 }, + { 225097, 37172 }, + { 234153, 38123 }, + { 243294, 39055 }, + { 252527, 39968 }, + { 261861, 40864 }, + { 271305, 41743 }, + { 280866, 42607 }, + { 290553, 43457 }, + { 300372, 44293 }, + { 310333, 45117 }, + { 320441, 45929 }, + { 330705, 46729 }, + { 341131, 47518 }, + { 351728, 48297 }, + { 362501, 49066 }, + { 373460, 49826 }, + { 384609, 50577 }, + { 395958, 51320 }, + { 407513, 52054 }, + { 419281, 52780 }, + { 431270, 53499 }, + { 443487, 54211 }, + { 455940, 54916 }, + { 468635, 55614 }, + { 481581, 56306 }, + { 494785, 56991 }, + { 508254, 57671 }, + { 521996, 58345 }, + { 536019, 59014 }, + { 550331, 59677 }, + { 564939, 60335 }, + { 579851, 60988 }, + { 595075, 61636 }, + { 610619, 62279 }, + { 626491, 62918 }, + { 642700, 63553 }, + { 659253, 64183 }, + { 676158, 64809 }, + { 693424, 65431 }, + { 711060, 66050 }, + { 729073, 66664 }, + { 747472, 67275 }, + { 766266, 67882 }, + { 785464, 68486 }, + { 805073, 69087 }, + { 825103, 69684 }, + { 845562, 70278 }, + { 866460, 70868 }, + { 887805, 71456 }, + { 909606, 72041 }, + { 931873, 72623 }, + { 954614, 73202 }, + { 977839, 73778 }, + { 1001557, 74352 }, + { 1025777, 74923 }, + { 1050508, 75492 }, + { 1075761, 76058 }, + { 1101544, 76621 }, + { 1127867, 77183 }, + { 1154739, 77741 }, + { 1182172, 78298 }, + { 1210173, 78852 }, + { 1238753, 79405 }, + { 1267922, 79955 }, + { 1297689, 80503 }, + { 1328066, 81049 }, + { 1359060, 81593 }, + { 1390684, 82135 }, + { 1422947, 82675 }, + { 1455859, 83213 }, + { 1489430, 83750 }, + { 1523671, 84284 }, + { 1558593, 84817 }, + { 1594205, 85348 }, + { 1630518, 85878 }, + { 1667543, 86406 }, + { 1705290, 86932 }, + { 1743770, 87457 }, + { 1782994, 87980 }, + { 1822973, 88501 }, + { 1863717, 89021 }, + { 1905237, 89540 }, + { 1947545, 90057 }, + { 1990650, 90573 }, + { 2034566, 91087 }, + { 2079301, 91600 }, + { 2124869, 92111 }, + { 2171279, 92622 }, + { 2218543, 93131 }, + { 2266673, 93639 }, + { 2315680, 94145 }, + { 2365575, 94650 }, + { 2416371, 95154 }, + { 2468077, 95657 }, + { 2520707, 96159 }, + { 2574271, 96660 }, + { 2628782, 97159 }, + { 2684250, 97658 }, + { 2740689, 98155 }, + { 2798110, 98651 }, + { 2856524, 99147 }, + { 2915944, 99641 }, + { 2976382, 100134 }, + { 3037850, 100626 }, + { 3100360, 101117 }, + { 3163924, 101608 }, + { 3228554, 102097 }, + { 3294263, 102586 }, + { 3361063, 103073 }, + { 3428966, 103560 }, + { 3497984, 104045 }, + { 3568131, 104530 }, + { 3639419, 105014 }, + { 3711860, 105498 }, + { 3785467, 105980 }, + { 3860253, 106462 }, + { 3936229, 106942 }, + { 4013410, 107422 }, + { 4091808, 107902 }, + { 4171435, 108380 }, + { 4252306, 108858 }, + { 4334431, 109335 }, + { 4417825, 109811 }, + { 4502501, 110287 }, + { 4588472, 110762 }, + { 4675750, 111236 }, + { 4764349, 111709 }, + { 4854283, 112182 }, + { 4945564, 112654 }, + { 5038206, 113126 }, + { 5132223, 113597 }, + { 5227627, 114067 }, + { 5324432, 114537 }, + { 5422652, 115006 }, + { 5522299, 115474 }, + { 5623389, 115942 }, + { 5725934, 116409 }, + { 5829948, 116876 }, + { 5935446, 117342 }, + { 6042439, 117808 }, + { 6150943, 118273 }, + { 6260972, 118738 }, + { 6372538, 119202 }, + { 6485657, 119665 }, + { 6600342, 120128 }, + { 6716607, 120591 }, + { 6834467, 121053 }, + { 6953935, 121514 }, + { 7075025, 121976 }, + { 7197752, 122436 }, + { 7322131, 122896 }, + { 7448175, 123356 }, + { 7575898, 123815 }, + { 7705316, 124274 }, + { 7836442, 124733 }, + { 7969291, 125191 }, + { 8103877, 125648 }, + { 8240216, 126105 }, + { 8378321, 126562 }, + { 8518208, 127018 }, + { 8659890, 127474 }, + { 8803384, 127930 }, + { 8948702, 128385 }, + { 9095861, 128840 }, + { 9244875, 129294 }, + { 9395760, 129748 }, + { 9548529, 130202 }, + { 9703198, 130655 }, + { 9859782, 131108 }, + { 10018296, 131561 }, + { 10178755, 132014 }, + { 10341174, 132466 }, + { 10505569, 132917 }, + { 10671954, 133369 }, + { 10840345, 133820 }, + { 11010757, 134271 }, + { 11183206, 134721 }, + { 11357706, 135171 }, + { 11534274, 135621 }, + { 11712924, 136071 }, + { 11893673, 136520 }, + { 12076536, 136969 }, + { 12261527, 137418 }, + { 12448664, 137867 }, + { 12637961, 138315 }, + { 12829435, 138763 }, + { 13023101, 139211 }, + { 13218974, 139658 }, + { 13417071, 140106 }, + { 13617407, 140553 }, + { 13819999, 140999 }, + { 14024862, 141446 }, + { 14232012, 141892 }, + { 14441465, 142339 }, + { 14653238, 142785 }, + { 14867346, 143230 }, + { 15083805, 143676 }, + { 15302632, 144121 }, + { 15523842, 144566 }, + { 15747453, 145011 }, + { 15973479, 145456 }, + { 16201939, 145900 }, + { 16432847, 146345 }, + { 16666221, 146789 }, + { 16902076, 147233 }, + { 17140429, 147677 }, + { 17381297, 148121 }, + { 17624696, 148564 }, + { 17870643, 149007 }, + { 18119154, 149451 }, + { 18370247, 149894 }, + { 18623936, 150336 }, + { 18880241, 150779 }, + { 19139176, 151222 }, + { 19400759, 151664 }, + { 19665007, 152107 }, + { 19931936, 152549 }, + { 20201564, 152991 }, + { 20473907, 153433 }, + { 20748982, 153875 }, + { 21026807, 154316 }, + { 21307399, 154758 }, + { 21590773, 155199 }, + { 21876949, 155641 }, + { 22165941, 156082 }, + { 22457769, 156523 }, + { 22752449, 156964 }, + { 23049999, 157405 }, + { 23350435, 157846 }, + { 23653774, 158287 }, + { 23960036, 158727 }, + { 24269236, 159168 }, + { 24581392, 159608 }, + { 24896521, 160049 }, + { 25214642, 160489 }, + { 25535772, 160929 }, + { 25859927, 161370 }, + { 26187127, 161810 }, + { 26517388, 162250 }, + { 26850728, 162690 }, + { 27187165, 163130 }, + { 27526716, 163569 }, + { 27869400, 164009 }, + { 28215234, 164449 }, + { 28564236, 164889 }, + { 28916423, 165328 }, + { 29271815, 165768 }, + { 29630428, 166208 }, + { 29992281, 166647 }, + { 30357392, 167087 }, + { 30725779, 167526 }, + { 31097459, 167965 }, + { 31472452, 168405 }, + { 31850774, 168844 }, + { 32232445, 169283 }, + { 32617482, 169723 }, + { 33005904, 170162 }, + { 33397730, 170601 }, + { 33792976, 171041 }, + { 34191663, 171480 }, + { 34593807, 171919 }, + { 34999428, 172358 }, + { 35408544, 172797 }, + { 35821174, 173237 }, + { 36237335, 173676 }, + { 36657047, 174115 }, + { 37080329, 174554 }, + { 37507197, 174993 }, + { 37937673, 175433 }, + { 38371773, 175872 }, + { 38809517, 176311 }, + { 39250924, 176750 }, + { 39696012, 177190 }, + { 40144800, 177629 }, + { 40597308, 178068 }, + { 41053553, 178507 }, + { 41513554, 178947 }, + { 41977332, 179386 }, + { 42444904, 179825 }, + { 42916290, 180265 }, + { 43391509, 180704 }, + { 43870579, 181144 }, + { 44353520, 181583 }, + { 44840352, 182023 }, + { 45331092, 182462 }, + { 45825761, 182902 }, + { 46324378, 183342 }, + { 46826961, 183781 }, + { 47333531, 184221 }, + { 47844106, 184661 }, + { 48358706, 185101 }, + { 48877350, 185541 }, + { 49400058, 185981 }, + { 49926849, 186421 }, + { 50457743, 186861 }, + { 50992759, 187301 }, + { 51531916, 187741 }, + { 52075235, 188181 }, + { 52622735, 188622 }, + { 53174435, 189062 }, + { 53730355, 189502 }, + { 54290515, 189943 }, + { 54854935, 190383 }, + { 55423634, 190824 }, + { 55996633, 191265 }, + { 56573950, 191706 }, + { 57155606, 192146 }, + { 57741621, 192587 }, + { 58332014, 193028 }, + { 58926806, 193470 }, + { 59526017, 193911 }, + { 60129666, 194352 }, + { 60737774, 194793 }, + { 61350361, 195235 }, + { 61967446, 195677 }, + { 62589050, 196118 }, + { 63215194, 196560 }, + { 63845897, 197002 }, + { 64481179, 197444 }, + { 65121061, 197886 }, + { 65765563, 198328 }, + { 66414705, 198770 }, + { 67068508, 199213 }, + { 67726992, 199655 }, + { 68390177, 200098 }, + { 69058085, 200540 }, + { 69730735, 200983 }, + { 70408147, 201426 }, + { 71090343, 201869 }, + { 71777343, 202312 }, + { 72469168, 202755 }, + { 73165837, 203199 }, + { 73867373, 203642 }, + { 74573795, 204086 }, + { 75285124, 204529 }, + { 76001380, 204973 }, + { 76722586, 205417 }, + { 77448761, 205861 }, + { 78179926, 206306 }, + { 78916102, 206750 }, + { 79657310, 207194 }, + { 80403571, 207639 }, + { 81154906, 208084 }, + { 81911335, 208529 }, + { 82672880, 208974 }, + { 83439562, 209419 }, + { 84211402, 209864 }, + { 84988421, 210309 }, + { 85770640, 210755 }, + { 86558080, 211201 }, + { 87350762, 211647 }, + { 88148708, 212093 }, + { 88951938, 212539 }, + { 89760475, 212985 }, + { 90574339, 213432 }, + { 91393551, 213878 }, + { 92218133, 214325 }, + { 93048107, 214772 }, + { 93883493, 215219 }, + { 94724314, 215666 }, + { 95570590, 216114 }, + { 96422343, 216561 }, + { 97279594, 217009 }, + { 98142366, 217457 }, + { 99010679, 217905 }, + { 99884556, 218353 }, + { 100764018, 218801 }, + { 101649086, 219250 }, + { 102539782, 219698 }, + { 103436128, 220147 }, + { 104338146, 220596 }, + { 105245857, 221046 }, + { 106159284, 221495 }, + { 107078448, 221945 }, + { 108003370, 222394 }, + { 108934074, 222844 }, + { 109870580, 223294 }, + { 110812910, 223745 }, + { 111761087, 224195 }, + { 112715133, 224646 }, + { 113675069, 225097 }, + { 114640918, 225548 }, + { 115612702, 225999 }, + { 116590442, 226450 }, + { 117574162, 226902 }, + { 118563882, 227353 }, + { 119559626, 227805 }, + { 120561415, 228258 }, + { 121569272, 228710 }, + { 122583219, 229162 }, + { 123603278, 229615 }, + { 124629471, 230068 }, + { 125661822, 230521 }, + { 126700352, 230974 }, + { 127745083, 231428 }, + { 128796039, 231882 }, + { 129853241, 232336 }, + { 130916713, 232790 }, + { 131986475, 233244 }, + { 133062553, 233699 }, + { 134144966, 234153 }, + { 135233739, 234608 }, + { 136328894, 235064 }, + { 137430453, 235519 }, + { 138538440, 235975 }, + { 139652876, 236430 }, + { 140773786, 236886 }, + { 141901190, 237343 }, + { 143035113, 237799 }, + { 144175576, 238256 }, + { 145322604, 238713 }, + { 146476218, 239170 }, + { 147636442, 239627 }, + { 148803298, 240085 }, + { 149976809, 240542 }, + { 151156999, 241000 }, + { 152343890, 241459 }, + { 153537506, 241917 }, + { 154737869, 242376 }, + { 155945002, 242835 }, + { 157158929, 243294 }, + { 158379673, 243753 }, + { 159607257, 244213 }, + { 160841704, 244673 }, + { 162083037, 245133 }, + { 163331279, 245593 }, + { 164586455, 246054 }, + { 165848586, 246514 }, + { 167117696, 246975 }, + { 168393810, 247437 }, + { 169676949, 247898 }, + { 170967138, 248360 }, + { 172264399, 248822 }, + { 173568757, 249284 }, + { 174880235, 249747 }, + { 176198856, 250209 }, + { 177524643, 250672 }, + { 178857621, 251136 }, + { 180197813, 251599 }, + { 181545242, 252063 }, + { 182899933, 252527 }, + { 184261908, 252991 }, + { 185631191, 253456 }, + { 187007807, 253920 }, + { 188391778, 254385 }, + { 189783129, 254851 }, + { 191181884, 255316 }, + { 192588065, 255782 }, + { 194001698, 256248 }, + { 195422805, 256714 }, + { 196851411, 257181 }, + { 198287540, 257648 }, + { 199731215, 258115 }, + { 201182461, 258582 }, + { 202641302, 259050 }, + { 204107760, 259518 }, + { 205581862, 259986 }, + { 207063630, 260454 }, + { 208553088, 260923 }, + { 210050262, 261392 }, + { 211555174, 261861 }, + { 213067849, 262331 }, + { 214588312, 262800 }, + { 216116586, 263270 }, + { 217652696, 263741 }, + { 219196666, 264211 }, + { 220748520, 264682 }, + { 222308282, 265153 }, + { 223875978, 265625 }, + { 225451630, 266097 }, + { 227035265, 266569 }, + { 228626905, 267041 }, + { 230226576, 267514 }, + { 231834302, 267986 }, + { 233450107, 268460 }, + { 235074016, 268933 }, + { 236706054, 269407 }, + { 238346244, 269881 }, + { 239994613, 270355 }, + { 241651183, 270830 }, + { 243315981, 271305 } +}; + +/* Calculate the send rate as per section 3.1 of RFC3448 + +Returns send rate in bytes per second + +Integer maths and lookups are used as not allowed floating point in kernel + +The function for Xcalc as per section 3.1 of RFC3448 is: + +X = s + ------------------------------------------------------------- + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) + +where +X is the trasmit rate in bytes/second +s is the packet size in bytes +R is the round trip time in seconds +p is the loss event rate, between 0 and 1.0, of the number of loss events + as a fraction of the number of packets transmitted +t_RTO is the TCP retransmission timeout value in seconds +b is the number of packets acknowledged by a single TCP acknowledgement + +we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: + +X = s + ----------------------------------------------------------------------- + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) + + +which we can break down into: + +X = s + -------- + R * f(p) + +where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) + +Function parameters: +s - bytes +R - RTT in usecs +p - loss rate (decimal fraction multiplied by 1,000,000) + +Returns Xcalc in bytes per second + +DON'T alter this code unless you run test cases against it as the code +has been manipulated to stop underflow/overlow. + +*/ +u32 tfrc_calc_x(u16 s, u32 R, u32 p) +{ + int index; + u32 f; + u64 tmp1, tmp2; + + if (p < TFRC_CALC_X_SPLIT) + index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1; + else + index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1; + + if (index < 0) + /* p should be 0 unless there is a bug in my code */ + index = 0; + + if (R == 0) + R = 1; /* RTT can't be zero or else divide by zero */ + + BUG_ON(index >= TFRC_CALC_X_ARRSIZE); + + if (p >= TFRC_CALC_X_SPLIT) + f = tfrc_calc_x_lookup[index][0]; + else + f = tfrc_calc_x_lookup[index][1]; + + tmp1 = ((u64)s * 100000000); + tmp2 = ((u64)R * (u64)f); + do_div(tmp2, 10000); + do_div(tmp1, tmp2); + /* Don't alter above math unless you test due to overflow on 32 bit */ + + return (u32)tmp1; +} + +EXPORT_SYMBOL_GPL(tfrc_calc_x); + +/* + * args: fvalue - function value to match + * returns: p closest to that value + * + * both fvalue and p are multiplied by 1,000,000 to use ints + */ +u32 tfrc_calc_x_reverse_lookup(u32 fvalue) +{ + int ctr = 0; + int small; + + if (fvalue < tfrc_calc_x_lookup[0][1]) + return 0; + + if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) + small = 1; + else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) + return 1000000; + else + small = 0; + + while (fvalue > tfrc_calc_x_lookup[ctr][small]) + ctr++; + + if (small) + return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE; + else + return 1000000 * ctr / TFRC_CALC_X_ARRSIZE; +} + +EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); From 072ab6c68e3dd158b68d97eaff16734474d2f8f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 01:19:14 -0300 Subject: [PATCH 484/584] [CCID3]: Move ccid3_hc_rx_add_hist to packet_history.c Renaming it to dccp_rx_hist_add_packet. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 122 +--------------------------- net/dccp/ccids/ccid3.h | 6 -- net/dccp/ccids/lib/packet_history.c | 111 +++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 11 +++ 4 files changed, 124 insertions(+), 126 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index a215c46d6f1b..849f5580efbd 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -744,125 +744,6 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, hcrx->ccid3hcrx_state = state; } -static int ccid3_hc_rx_add_hist(struct sock *sk, - struct dccp_rx_hist_entry *packet) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct dccp_rx_hist_entry *entry, *next, *iter; - u8 num_later = 0; - - iter = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); - if (iter == NULL) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, - &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; - - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(ccid3_rx_hist, - packet); - ccid3_pr_debug("%s, sk=%p, packet" - "(%llu) already lost!\n", - dccp_role(sk), sk, - seqno); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, - packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } - -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ - num_later = TFRC_RECV_NUM_LATE_LOSS + 1; - - if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - } else { - int step = 0; - u8 win_count = 0; /* Not needed, but lets shut up gcc */ - int tmp; - /* - * We have no loss interval history so we need at least one - * rtt:s of data packets to approximate rtt. - */ - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - switch (step) { - case 0: - step = 1; - /* OK, find next data packet */ - num_later = 1; - break; - case 1: - step = 2; - /* OK, find next data packet */ - num_later = 1; - win_count = entry->dccphrx_ccval; - break; - case 2: - tmp = win_count - entry->dccphrx_ccval; - if (tmp < 0) - tmp += TFRC_WIN_COUNT_LIMIT; - if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { - /* - * We have found a packet older - * than one rtt remove the rest - */ - step = 3; - } else /* OK, find next data packet */ - num_later = 1; - break; - case 3: - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, - entry); - break; - } - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - } - - return 0; -} - static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); @@ -1185,7 +1066,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = ccid3_hc_rx_add_hist(sk, packet); + ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f68d0b4e31e9..ee8cbace6630 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -51,17 +51,11 @@ /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -#define TFRC_WIN_COUNT_PER_RTT 4 -#define TFRC_WIN_COUNT_LIMIT 16 - /* In seconds */ #define TFRC_MAX_BACK_OFF_TIME 64 #define TFRC_SMALLEST_P 40 -/* Number of later packets received before one is considered lost */ -#define TFRC_RECV_NUM_LATE_LOSS 3 - enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, TFRC_OPT_LOSS_INTERVALS = 193, diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index f252a9555e31..e2576b45ac0b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -113,6 +113,117 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); +int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet) +{ + struct dccp_rx_hist_entry *entry, *next, *iter; + u8 num_later = 0; + + iter = dccp_rx_hist_head(rx_list); + if (iter == NULL) + dccp_rx_hist_add_entry(rx_list, packet); + else { + const u64 seqno = packet->dccphrx_seqno; + + if (after48(seqno, iter->dccphrx_seqno)) + dccp_rx_hist_add_entry(rx_list, packet); + else { + if (dccp_rx_hist_entry_data_packet(iter)) + num_later = 1; + + list_for_each_entry_continue(iter, rx_list, + dccphrx_node) { + if (after48(seqno, iter->dccphrx_seqno)) { + dccp_rx_hist_add_entry(&iter->dccphrx_node, + packet); + goto trim_history; + } + + if (dccp_rx_hist_entry_data_packet(iter)) + num_later++; + + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { + dccp_rx_hist_entry_delete(hist, packet); + return 1; + } + } + + if (num_later < TFRC_RECV_NUM_LATE_LOSS) + dccp_rx_hist_add_entry(rx_list, packet); + /* + * FIXME: else what? should we destroy the packet + * like above? + */ + } + } + +trim_history: + /* + * Trim history (remove all packets after the NUM_LATE_LOSS + 1 + * data packets) + */ + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; + + if (!list_empty(li_list)) { + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + } else { + int step = 0; + u8 win_count = 0; /* Not needed, but lets shut up gcc */ + int tmp; + /* + * We have no loss interval history so we need at least one + * rtt:s of data packets to approximate rtt. + */ + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + switch (step) { + case 0: + step = 1; + /* OK, find next data packet */ + num_later = 1; + break; + case 1: + step = 2; + /* OK, find next data packet */ + num_later = 1; + win_count = entry->dccphrx_ccval; + break; + case 2: + tmp = win_count - entry->dccphrx_ccval; + if (tmp < 0) + tmp += TFRC_WIN_COUNT_LIMIT; + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { + /* + * We have found a packet older + * than one rtt remove the rest + */ + step = 3; + } else /* OK, find next data packet */ + num_later = 1; + break; + case 3: + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + break; + } + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + } + + return 0; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); + struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 235828d822d9..ebfcb8e2c676 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -44,6 +44,12 @@ #include "../../dccp.h" +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, @@ -182,4 +188,9 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } +extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet); + #endif /* _DCCP_PKT_HIST_ */ From 29e4f8b3c340c4b2a0c6dd197b985e03826afd13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 02:00:28 -0300 Subject: [PATCH 485/584] [CCID3]: Move ccid3_hc_rx_detect_loss to packet_history.c Renaming it to dccp_rx_hist_detect_loss. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 83 ++--------------------------- net/dccp/ccids/lib/packet_history.c | 82 ++++++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 3 ++ 3 files changed, 89 insertions(+), 79 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 849f5580efbd..4ff6ede0f07d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -927,86 +927,11 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 win_loss = 0; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + u8 win_loss; + const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, + &win_loss); - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out_update_li; - - num_later = 1; - - list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(&hcrx->ccid3hcrx_li_hist)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - dccp_role(sk)); - goto out_update_li; - } else { - pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " - "packets in history", - __FUNCTION__, dccp_role(sk), sk); - return; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != - packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } - - if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->dccphrx_ccval; - -out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); } diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index e2576b45ac0b..d3f9d2053830 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -224,6 +224,88 @@ trim_history: EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); +u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, + struct list_head *li_list, u8 *win_loss) +{ + struct dccp_rx_hist_entry *entry, *next, *packet; + struct dccp_rx_hist_entry *a_loss = NULL; + struct dccp_rx_hist_entry *b_loss = NULL; + u64 seq_loss = DCCP_MAX_SEQNO + 1; + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + b_loss = entry; + break; + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + + if (b_loss == NULL) + goto out; + + num_later = 1; + list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + a_loss = entry; + break; + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + + if (a_loss == NULL) { + if (list_empty(li_list)) { + /* no loss event have occured yet */ + LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " + "comparing to initial seqno\n", + __FUNCTION__); + goto out; + } else { + LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", + __FUNCTION__); + goto out; + } + } + + /* Locate a lost data packet */ + entry = packet = b_loss; + list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { + u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, + packet->dccphrx_seqno); + + if (delta != 0) { + if (dccp_rx_hist_entry_data_packet(packet)) + --delta; + /* + * FIXME: check this, probably this % usage is because + * in earlier drafts the ndp count was just 8 bits + * long, but now it cam be up to 24 bits long. + */ +#if 0 + if (delta % DCCP_NDP_LIMIT != + (packet->dccphrx_ndp - + entry->dccphrx_ndp) % DCCP_NDP_LIMIT) +#endif + if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { + seq_loss = entry->dccphrx_seqno; + dccp_inc_seqno(&seq_loss); + } + } + packet = entry; + if (packet == a_loss) + break; + } +out: + if (seq_loss != DCCP_MAX_SEQNO + 1) + *win_loss = a_loss->dccphrx_ccval; + else + *win_loss = 0; /* Paranoia */ + + return seq_loss; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); + struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index ebfcb8e2c676..fb90a91aa93d 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -193,4 +193,7 @@ extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *li_list, struct dccp_rx_hist_entry *packet); +extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, + struct list_head *li_list, u8 *win_loss); + #endif /* _DCCP_PKT_HIST_ */ From 5f2c3b910744f68e1a507f027398f404b3feb5fb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:03 -0700 Subject: [PATCH 486/584] [NETFILTER]: Add new iptables TTL target This new iptables target allows manipulation of the TTL of an IPv4 packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_TTL.h | 21 +++++ net/ipv4/netfilter/Kconfig | 14 +++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_TTL.c | 119 +++++++++++++++++++++++++ 4 files changed, 155 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_TTL.h create mode 100644 net/ipv4/netfilter/ipt_TTL.c diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h new file mode 100644 index 000000000000..ee6611edc112 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_TTL.h @@ -0,0 +1,21 @@ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ + +#ifndef _IPT_TTL_H +#define _IPT_TTL_H + +enum { + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC +}; + +#define IPT_TTL_MAXMODE IPT_TTL_DEC + +struct ipt_TTL_info { + u_int8_t mode; + u_int8_t ttl; +}; + + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c4213f3de505..e046f5521814 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -664,6 +664,20 @@ config IP_NF_TARGET_CLASSIFY To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_TTL + tristate 'TTL target support' + depends on IP_NF_MANGLE + help + This option adds a `TTL' target, which enables the user to modify + the TTL value of the IP header. + + While it is safe to decrement/lower the TTL, this target also enables + functionality to increment and set the TTL value of the IP header to + arbitrary values. This is EXTREMELY DANGEROUS since you can easily + create immortal packets that loop forever on the network. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_CONNMARK tristate 'CONNMARK target support' depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 89cae69ee208..a7bd38f50522 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o +obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c new file mode 100644 index 000000000000..b9ae6a9382f3 --- /dev/null +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -0,0 +1,119 @@ +/* TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("IP tables TTL modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int +ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = targinfo; + u_int16_t diffs[2]; + int new_ttl; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + iph = (*pskb)->nh.iph; + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->ttl = new_ttl; + diffs[1] = htons(((unsigned)iph->ttl) << 8); + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + } + + return IPT_CONTINUE; +} + +static int ipt_ttl_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_TTL_info *info = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { + printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_TTL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "ipt_TTL: can only be called from " + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) + return 0; + + return 1; +} + +static struct ipt_target ipt_TTL = { + .name = "TTL", + .target = ipt_ttl_target, + .checkentry = ipt_ttl_checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_TTL); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_TTL); +} + +module_init(init); +module_exit(fini); From 0ac4f893f20ed524198da5ebf591fc0b9e2ced2f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:30 -0700 Subject: [PATCH 487/584] [NETFILTER6]: Add new ip6tables HOPLIMIT target This target allows users to modify the hoplimit header field of the IPv6 header. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_HL.h | 22 +++++ net/ipv6/netfilter/Kconfig | 16 ++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_HL.c | 118 +++++++++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_HL.h create mode 100644 net/ipv6/netfilter/ip6t_HL.c diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h new file mode 100644 index 000000000000..afb7813d45ab --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -0,0 +1,22 @@ +/* Hop Limit modification module for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module */ + +#ifndef _IP6T_HL_H +#define _IP6T_HL_H + +enum { + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC +}; + +#define IP6T_HL_MAXMODE IP6T_HL_DEC + +struct ip6t_HL_info { + u_int8_t mode; + u_int8_t hop_limit; +}; + + +#endif diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 8a10c2d0d154..216fbe1ac65c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -239,6 +239,22 @@ config IP6_NF_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_HL + tristate 'HL (hoplimit) target support' + depends on IP6_NF_MANGLE + help + This option adds a `HL' target, which enables the user to decrement + the hoplimit value of the IPv6 header or set it to a given (lower) + value. + + While it is safe to decrement the hoplimit value, this option also + enables functionality to increment and set the hoplimit value of the + IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since + you can easily create immortal packets that loop forever on the + network. + + To compile it as a module, choose M here. If unsure, say N. + #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES config IP6_NF_RAW tristate 'raw table support (required for TRACE)' diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 70f6ba610102..bd9a16a5cbba 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c new file mode 100644 index 000000000000..8f5549b72720 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -0,0 +1,118 @@ +/* + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int ip6t_hl_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = targinfo; + u_int16_t diffs[2]; + int new_hl; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + ip6h = (*pskb)->nh.ipv6h; + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + if (new_hl != ip6h->hop_limit) { + diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + ip6h->hop_limit = new_hl; + diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); + } + + return IP6T_CONTINUE; +} + +static int ip6t_hl_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip6t_HL_info *info = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) { + printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ip6t_HL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "ip6t_HL: can only be called from " + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_HL = { + .name = "HL", + .target = ip6t_hl_target, + .checkentry = ip6t_hl_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ip6t_register_target(&ip6t_HL); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_HL); +} + +module_init(init); +module_exit(fini); From a84ffe430342db6ee585a5038f3242a6b4112d69 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 04:51:32 -0300 Subject: [PATCH 488/584] [DCCP]: Introduce DCCP_SOCKOPT_PACKET_SIZE So that applications can set dccp_sock->dccps_pkt_size, that in turn is used in the CCID3 half connection init routines to set ccid3hc[tr]x_s and use it in its rate calculations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 ++++-- net/dccp/ccids/ccid3.c | 12 +++++----- net/dccp/proto.c | 52 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 9e3a1370b906..007c290f74d4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -186,6 +186,9 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket options */ +#define DCCP_SOCKOPT_PACKET_SIZE 1 + #ifdef __KERNEL__ #include @@ -396,7 +399,7 @@ enum dccp_role { * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) * @dccps_pmtu_cookie - Last pmtu seen by socket - * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackpkts - receiver half connection acked packets @@ -417,7 +420,7 @@ struct dccp_sock { unsigned long dccps_service; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; - __u32 dccps_avg_packet_size; + __u32 dccps_packet_size; unsigned long dccps_ndp_count; __u16 dccps_ext_header_len; __u32 dccps_pmtu_cookie; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4ff6ede0f07d..e22b0eefdbf9 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -672,9 +672,9 @@ static int ccid3_hc_tx_init(struct sock *sk) memset(hctx, 0, sizeof(*hctx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = dp->dccps_packet_size; else hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; @@ -1058,9 +1058,9 @@ static int ccid3_hc_rx_init(struct sock *sk) memset(hcrx, 0, sizeof(*hcrx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = dp->dccps_packet_size; else hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index f4da6561e40c..18a0e69c9dc7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -205,23 +205,67 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int err; + int val; if (level != SOL_DCCP) return ip_setsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + dp = dccp_sk(sk); + err = 0; + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + dp->dccps_packet_size = val; + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; } int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int val, len; if (level != SOL_DCCP) return ip_getsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + if (len < 0) + return -EINVAL; + + dp = dccp_sk(sk); + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + val = dp->dccps_packet_size; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; } int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, From c530cfb1ce1e8f230744c3f3bd86771f50725053 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Aug 2005 02:15:54 -0300 Subject: [PATCH 489/584] [CCID3]: Call sk->sk_write_space(sk) when receiving a feedback packet This makes the send rate calculations behave way more closely to what is specified, with the jitter previously seen on x and x_recv disappearing completely on non lossy setups. This resembles the tcp_data_snd_check code, that possibly we'll end up using in DCCP as well, perhaps moving this code to inet_connection_sock. For now I'm doing the simplest implementation tho. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 5 +++++ net/dccp/dccp.h | 1 + net/dccp/ipv4.c | 1 + net/dccp/output.c | 13 +++++++++++++ 4 files changed, 20 insertions(+) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e22b0eefdbf9..7bf3b3a91e97 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -556,6 +556,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* remove all packets older than the one acked from history */ dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); + /* + * As we have calculated new ipi, delta, t_nom it is possible that + * we now can send a packet, so wake up dccp_wait_for_ccids. + */ + sk->sk_write_space(sk); /* * Schedule no feedback timer to expire in diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5cd9e794bbe2..33456c0d5937 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -127,6 +127,7 @@ extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); +extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3cf2cbcdcafd..3fc75dbee4b8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1280,6 +1280,7 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_init_xmit_timers(sk); inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; + sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; diff --git a/net/dccp/output.c b/net/dccp/output.c index 116f6db5678d..28de157a4326 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -150,6 +150,19 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +void dccp_write_space(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible(sk->sk_sleep); + /* Should agree with poll, otherwise some programs break */ + if (sock_writeable(sk)) + sk_wake_async(sk, 2, POLL_OUT); + + read_unlock(&sk->sk_callback_lock); +} + /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for From d992895ba2b27cf5adf1ba0ad6d27662adc54c5e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 28 Aug 2005 16:49:11 +1000 Subject: [PATCH 490/584] [PATCH] Lazy page table copies in fork() Defer copying of ptes until fault time when it is possible to reconstruct the pte from backing store. Idea from Andi Kleen and Nick Piggin. Thanks to input from Rik van Riel and Linus and to Hugh for correcting my blundering. Ray Fucillo reports: "I applied this latest patch to a 2.6.12 kernel and found that it does resolve the problem. Prior to the patch on this machine, I was seeing about 23ms spent in fork for ever 100MB of shared memory segment. After applying the patch, fork is taking about 1ms regardless of the shared memory size." Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- mm/memory.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index e046b7e4b530..a596c1172248 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -498,6 +498,17 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + /* + * Don't copy ptes where a page fault will fill them correctly. + * Fork becomes much lighter when there are big shared or private + * readonly mappings. The tradeoff is that copy_page_range is more + * efficient than faulting. + */ + if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { + if (!vma->anon_vma) + return 0; + } + if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); From b74d0bd53406c23636707565d87ddaa55d315b26 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Aug 2005 13:53:41 +1000 Subject: [PATCH 491/584] [PATCH] ppc64: four level pagetables fix With CONFIG_HUGETLB_PAGE=n: In file included from kernel/sysctl.c:37: include/linux/hugetlb.h:104:1: warning: "hugetlb_free_pgd_range" redefined In file included from include/linux/mm.h:36, from kernel/sysctl.c:23: include/asm/pgtable.h:492:1: warning: this is the location of the previous definition Signed-off-by: Andrew Morton Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 5ea952ad7164..c83679c9d2b0 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -489,8 +489,10 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +#ifdef CONFIG_HUGETLB_PAGE #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ free_pgd_range(tlb, addr, end, floor, ceiling) +#endif /* * This gets called at the end of handling a page fault, when From 2739e8cf113ce6e931608986a28bab5a42c0acd9 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 30 Aug 2005 12:54:07 +1000 Subject: [PATCH 492/584] [PATCH] Restore lparmap.s include for iSeries A mistake rebasing the series of ppc64 head.S cleanup patches meant the #include of lparmap.s, needed for iSeries was lost. This patch puts it back again. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index cccec4902646..d98a9986c14f 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1269,7 +1269,16 @@ initial_stab: .= 0x7000 .globl fwnmi_data_area fwnmi_data_area: - .space PAGE_SIZE + + /* iSeries does not use the FWNMI stuff, so it is safe to put + * this here, even if we later allow kernels that will boot on + * both pSeries and iSeries */ +#ifdef CONFIG_PPC_ISERIES + . = LPARMAP_PHYS +#include "lparmap.s" +#endif /* CONFIG_PPC_ISERIES */ + + . = 0x8000 /* * On pSeries, secondary processors spin in the following code. From 1e4a79e0458beca871c662028610ae3a88e3f1bf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 18:26:01 +1000 Subject: [PATCH 493/584] [PATCH] fix iSeries build for gcc-3.4 gcc 3.4 (at least the build we are using) puts the gcc generated .ident string into a .note section at the end of the files it compiles (gcc 3.3.3-hammer and gcc 4.0.2 Debian puts it in the .text section). This means that the lparmap.s file we produce in the iSeries build may end with a .note section. When we include it into head.S, the assembler can no longer resolve some of the conditional branches since the target label ends up too far away. This patch just forces us back to the .text section after including lparmap.s. The breakage was caused by my patch "iSeries build with newer assemblers and compilers" (sha1-id: 2ad56496627630ebc99f06af5f81ca23e17e014e). Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index d98a9986c14f..036959775623 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1276,6 +1276,11 @@ fwnmi_data_area: #ifdef CONFIG_PPC_ISERIES . = LPARMAP_PHYS #include "lparmap.s" +/* + * This ".text" is here for old compilers that generate a trailing + * .note section when compiling .c files to .s + */ + .text #endif /* CONFIG_PPC_ISERIES */ . = 0x8000 From 5c0b4b8759f78c31172088a91e10733fc014ccee Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:37:35 +1000 Subject: [PATCH 494/584] [PATCH] Formatting changes to vio.c Formatting changes to vio.c to bring it closer to the kernel coding standard. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/vio.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 3b790bafcaad..7798f01f77b4 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -37,9 +37,11 @@ static int (*is_match)(const struct vio_device_id *id, static void (*unregister_device_callback)(struct vio_dev *dev); static void (*release_device_callback)(struct device *dev); -/* convert from struct device to struct vio_dev and pass to driver. +/* + * Convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match - * succeeded. */ + * succeeded. + */ static int vio_bus_probe(struct device *dev) { struct vio_dev *viodev = to_vio_dev(dev); @@ -51,9 +53,8 @@ static int vio_bus_probe(struct device *dev) return error; id = vio_match_device(viodrv->id_table, viodev); - if (id) { + if (id) error = viodrv->probe(viodev, id); - } return error; } @@ -64,9 +65,8 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); - if (viodrv->remove) { + if (viodrv->remove) return viodrv->remove(viodev); - } /* driver can't remove */ return 1; @@ -102,16 +102,17 @@ void vio_unregister_driver(struct vio_driver *viodrv) EXPORT_SYMBOL(vio_unregister_driver); /** - * vio_match_device: - Tell if a VIO device has a matching VIO device id structure. - * @ids: array of VIO device id structures to search in - * @dev: the VIO device structure to match against + * vio_match_device: - Tell if a VIO device has a matching + * VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against * * Used by a driver to check whether a VIO device present in the * system is in its list of supported devices. Returns the matching * vio_device_id structure or NULL if there is no match. */ -static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, - const struct vio_dev *dev) +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *ids, const struct vio_dev *dev) { while (ids->type) { if (is_match(ids, dev)) @@ -141,7 +142,8 @@ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, return err; } - /* the fake parent of all vio devices, just to give us + /* + * The fake parent of all vio devices, just to give us * a nice directory */ err = device_register(&vio_bus_device.dev); @@ -162,7 +164,8 @@ static void __devinit vio_dev_release(struct device *dev) kfree(to_vio_dev(dev)); } -static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t viodev_show_name(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_vio_dev(dev)->name); } @@ -262,16 +265,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) const struct vio_dev *vio_dev = to_vio_dev(dev); struct vio_driver *vio_drv = to_vio_driver(drv); const struct vio_device_id *ids = vio_drv->id_table; - const struct vio_device_id *found_id; - if (!ids) - return 0; - - found_id = vio_match_device(ids, vio_dev); - if (found_id) - return 1; - - return 0; + return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); } struct bus_type vio_bus_type = { From b877b90f227fb9698d99fb70492d432362584082 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:40:12 +1000 Subject: [PATCH 495/584] [PATCH] Create vio_register_device Take some assignments out of vio_register_device_common and rename it to vio_register_device. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 15 +++++++++++---- arch/ppc64/kernel/pSeries_vio.c | 9 ++++++--- arch/ppc64/kernel/vio.c | 8 +------- include/asm-ppc64/vio.h | 4 +--- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index b4268cc4ba48..d0960a82708c 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -68,7 +68,7 @@ static void __init iommu_vio_init(void) } /** - * vio_register_device: - Register a new vio device. + * vio_register_device_iseries: - Register a new iSeries vio device. * @voidev: The device to register. */ static struct vio_dev *__init vio_register_device_iseries(char *type, @@ -76,7 +76,7 @@ static struct vio_dev *__init vio_register_device_iseries(char *type, { struct vio_dev *viodev; - /* allocate a vio_dev for this node */ + /* allocate a vio_dev for this device */ viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); if (!viodev) return NULL; @@ -84,8 +84,15 @@ static struct vio_dev *__init vio_register_device_iseries(char *type, snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); - return vio_register_device_common(viodev, viodev->dev.bus_id, type, - unit_num, &vio_iommu_table); + viodev->name = viodev->dev.bus_id; + viodev->type = type; + viodev->unit_address = unit_num; + viodev->iommu_table = &vio_iommu_table; + if (vio_register_device(viodev) == NULL) { + kfree(viodev); + return NULL; + } + return viodev; } void __init probe_bus_iseries(void) diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c index 338f9e1bdc09..81e94f8aa846 100644 --- a/arch/ppc64/kernel/pSeries_vio.c +++ b/arch/ppc64/kernel/pSeries_vio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -181,11 +182,13 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) } snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + viodev->name = of_node->name; + viodev->type = of_node->type; + viodev->unit_address = *unit_address; + viodev->iommu_table = vio_build_iommu_table(viodev); /* register with generic device framework */ - if (vio_register_device_common(viodev, of_node->name, of_node->type, - *unit_address, vio_build_iommu_table(viodev)) - == NULL) { + if (vio_register_device(viodev) == NULL) { /* XXX free TCE table */ kfree(viodev); return NULL; diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 7798f01f77b4..3eab2290b12a 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -171,14 +171,8 @@ static ssize_t viodev_show_name(struct device *dev, } DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); -struct vio_dev * __devinit vio_register_device_common( - struct vio_dev *viodev, char *name, char *type, - uint32_t unit_address, struct iommu_table *iommu_table) +struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) { - viodev->name = name; - viodev->type = type; - viodev->unit_address = unit_address; - viodev->iommu_table = iommu_table; /* init generic 'struct device' fields: */ viodev->dev.parent = &vio_bus_device.dev; viodev->dev.bus = &vio_bus_type; diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index a82e87c1c5fa..578e30193b7b 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -56,9 +56,7 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); -extern struct vio_dev * __devinit vio_register_device_common( - struct vio_dev *viodev, char *name, char *type, - uint32_t unit_address, struct iommu_table *iommu_table); +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); extern struct dma_mapping_ops vio_dma_ops; From 71d276d751ff5ddba28312aecefb174b20a5b970 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:41:44 +1000 Subject: [PATCH 496/584] [PATCH] Create vio_bus_ops Create vio_bus_ops so that we just pass a structure to vio_bus_init instead of three separate function pointers. Rearrange vio.h to avoid forward references. vio.h only needs struct device_node from prom.h so remove the include and just declare it. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 6 +- arch/ppc64/kernel/pSeries_vio.c | 10 +++- arch/ppc64/kernel/vio.c | 24 +++----- drivers/scsi/ibmvscsi/rpa_vscsi.c | 1 + include/asm-ppc64/vio.h | 97 +++++++++++++++---------------- 5 files changed, 69 insertions(+), 69 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index d0960a82708c..6b754b0c8344 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -131,6 +131,10 @@ static int vio_match_device_iseries(const struct vio_device_id *id, return strncmp(dev->type, id->type, strlen(id->type)) == 0; } +static struct vio_bus_ops vio_bus_ops_iseries = { + .match = vio_match_device_iseries, +}; + /** * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus */ @@ -138,7 +142,7 @@ static int __init vio_bus_init_iseries(void) { int err; - err = vio_bus_init(vio_match_device_iseries, NULL, NULL); + err = vio_bus_init(&vio_bus_ops_iseries); if (err == 0) { iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c index 81e94f8aa846..e0ae06f58f86 100644 --- a/arch/ppc64/kernel/pSeries_vio.c +++ b/arch/ppc64/kernel/pSeries_vio.c @@ -76,6 +76,12 @@ static void vio_unregister_device_pseries(struct vio_dev *viodev) device_remove_file(&viodev->dev, &dev_attr_devspec); } +static struct vio_bus_ops vio_bus_ops_pseries = { + .match = vio_match_device_pseries, + .unregister_device = vio_unregister_device_pseries, + .release_device = vio_release_device_pseries, +}; + /** * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus */ @@ -83,9 +89,7 @@ static int __init vio_bus_init_pseries(void) { int err; - err = vio_bus_init(vio_match_device_pseries, - vio_unregister_device_pseries, - vio_release_device_pseries); + err = vio_bus_init(&vio_bus_ops_pseries); if (err == 0) probe_bus_pseries(); return err; diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 3eab2290b12a..93c437a0911b 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -32,10 +32,7 @@ struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; -static int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev); -static void (*unregister_device_callback)(struct vio_dev *dev); -static void (*release_device_callback)(struct device *dev); +static struct vio_bus_ops vio_bus_ops; /* * Convert from struct device to struct vio_dev and pass to driver. @@ -115,7 +112,7 @@ static const struct vio_device_id *vio_match_device( const struct vio_device_id *ids, const struct vio_dev *dev) { while (ids->type) { - if (is_match(ids, dev)) + if (vio_bus_ops.match(ids, dev)) return ids; ids++; } @@ -125,16 +122,11 @@ static const struct vio_device_id *vio_match_device( /** * vio_bus_init: - Initialize the virtual IO bus */ -int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, - const struct vio_dev *dev), - void (*unregister_dev)(struct vio_dev *), - void (*release_dev)(struct device *)) +int __init vio_bus_init(struct vio_bus_ops *ops) { int err; - is_match = match_func; - unregister_device_callback = unregister_dev; - release_device_callback = release_dev; + vio_bus_ops = *ops; err = bus_register(&vio_bus_type); if (err) { @@ -159,8 +151,8 @@ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - if (release_device_callback) - release_device_callback(dev); + if (vio_bus_ops.release_device) + vio_bus_ops.release_device(dev); kfree(to_vio_dev(dev)); } @@ -191,8 +183,8 @@ struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) void __devinit vio_unregister_device(struct vio_dev *viodev) { - if (unregister_device_callback) - unregister_device_callback(viodev); + if (vio_bus_ops.unregister_device) + vio_bus_ops.unregister_device(viodev); device_remove_file(&viodev->dev, &dev_attr_name); device_unregister(&viodev->dev); } diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c index 035f615817d7..8bf5652f1060 100644 --- a/drivers/scsi/ibmvscsi/rpa_vscsi.c +++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c @@ -28,6 +28,7 @@ */ #include +#include #include #include #include diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 578e30193b7b..85420bb37d58 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -19,13 +19,14 @@ #include #include #include + #include -#include #include -/* + +/* * Architecture-specific constants for drivers to * extract attributes of the device using vio_get_attribute() -*/ + */ #define VETH_MAC_ADDR "local-mac-address" #define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" @@ -37,30 +38,19 @@ #define VIO_IRQ_DISABLE 0UL #define VIO_IRQ_ENABLE 1UL -struct vio_dev; -struct vio_driver; -struct vio_device_id; struct iommu_table; -int vio_register_driver(struct vio_driver *drv); -void vio_unregister_driver(struct vio_driver *drv); - -#ifdef CONFIG_PPC_PSERIES -struct vio_dev * __devinit vio_register_device_node( - struct device_node *node_vdev); -#endif -void __devinit vio_unregister_device(struct vio_dev *dev); -struct vio_dev *vio_find_node(struct device_node *vnode); - -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); -int vio_get_irq(struct vio_dev *dev); -int vio_enable_interrupts(struct vio_dev *dev); -int vio_disable_interrupts(struct vio_dev *dev); -extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); - -extern struct dma_mapping_ops vio_dma_ops; - -extern struct bus_type vio_bus_type; +/* + * The vio_dev structure is used to describe virtual I/O devices. + */ +struct vio_dev { + struct iommu_table *iommu_table; /* vio_map_* uses this */ + char *name; + char *type; + uint32_t unit_address; + unsigned int irq; + struct device dev; +}; struct vio_device_id { char *type; @@ -70,42 +60,51 @@ struct vio_device_id { struct vio_driver { struct list_head node; char *name; - const struct vio_device_id *id_table; /* NULL if wants all devices */ - int (*probe) (struct vio_dev *dev, const struct vio_device_id *id); /* New device inserted */ - int (*remove) (struct vio_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); unsigned long driver_data; - struct device_driver driver; }; +struct vio_bus_ops { + int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); + void (*unregister_device)(struct vio_dev *); + void (*release_device)(struct device *); +}; + +extern struct dma_mapping_ops vio_dma_ops; +extern struct bus_type vio_bus_type; +extern struct vio_dev vio_bus_device; + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); +extern void __devinit vio_unregister_device(struct vio_dev *dev); + +extern int vio_bus_init(struct vio_bus_ops *); + +#ifdef CONFIG_PPC_PSERIES +struct device_node; + +extern struct vio_dev * __devinit vio_register_device_node( + struct device_node *node_vdev); +extern struct vio_dev *vio_find_node(struct device_node *vnode); +extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, + int *length); +extern int vio_enable_interrupts(struct vio_dev *dev); +extern int vio_disable_interrupts(struct vio_dev *dev); +#endif + static inline struct vio_driver *to_vio_driver(struct device_driver *drv) { return container_of(drv, struct vio_driver, driver); } -/* - * The vio_dev structure is used to describe virtual I/O devices. - */ -struct vio_dev { - struct iommu_table *iommu_table; /* vio_map_* uses this */ - char *name; - char *type; - uint32_t unit_address; - unsigned int irq; - - struct device dev; -}; - -extern struct vio_dev vio_bus_device; - static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev), - void (*)(struct vio_dev *), - void (*)(struct device *)); - #endif /* _ASM_VIO_H */ From fb120da678c517f72d4b39932062c2191827b331 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:42:59 +1000 Subject: [PATCH 497/584] [PATCH] Make MODULE_DEVICE_TABLE work for vio devices Make MODULE_DEVICE_TABLE work for vio devices. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/vio.c | 2 +- drivers/block/viodasd.c | 2 +- drivers/cdrom/viocd.c | 2 +- drivers/char/hvc_vio.c | 2 +- drivers/char/hvcs.c | 2 +- drivers/char/viotape.c | 2 +- drivers/net/ibmveth.c | 2 +- drivers/net/iseries_veth.c | 2 +- drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- include/asm-ppc64/vio.h | 6 +----- include/linux/mod_devicetable.h | 7 ++++++- scripts/mod/file2alias.c | 19 +++++++++++++++++++ 12 files changed, 35 insertions(+), 15 deletions(-) diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 93c437a0911b..c90e1dd875ce 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL(vio_unregister_driver); static const struct vio_device_id *vio_match_device( const struct vio_device_id *ids, const struct vio_dev *dev) { - while (ids->type) { + while (ids->type[0] != '\0') { if (vio_bus_ops.match(ids, dev)) return ids; ids++; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 46e56a25d2c8..e46ecd23b3ac 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -776,7 +776,7 @@ static int viodasd_remove(struct vio_dev *vdev) */ static struct vio_device_id viodasd_device_table[] __devinitdata = { { "viodasd", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viodasd_device_table); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 38dd9ffbe8bc..0829db58462f 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -734,7 +734,7 @@ static int viocd_remove(struct vio_dev *vdev) */ static struct vio_device_id viocd_device_table[] __devinitdata = { { "viocd", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viocd_device_table); diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 60bb9152b832..78d681dc35a8 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -39,7 +39,7 @@ char hvc_driver_name[] = "hvc_console"; static struct vio_device_id hvc_driver_table[] __devinitdata = { {"serial", "hvterm1"}, - { NULL, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, hvc_driver_table); diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 3236d2404905..f47f009f9259 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -527,7 +527,7 @@ static int khvcsd(void *unused) static struct vio_device_id hvcs_driver_table[] __devinitdata= { {"serial-server", "hvterm2"}, - { NULL, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, hvcs_driver_table); diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index 4764b4f9555d..0aff45fac2e6 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -991,7 +991,7 @@ static int viotape_remove(struct vio_dev *vdev) */ static struct vio_device_id viotape_device_table[] __devinitdata = { { "viotape", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viotape_device_table); diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index c39b0609742a..32d5fabd4b10 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1144,7 +1144,7 @@ static void ibmveth_proc_unregister_driver(void) static struct vio_device_id ibmveth_device_table[] __devinitdata= { { "network", "IBM,l-lan"}, - { 0,} + { "", "" } }; MODULE_DEVICE_TABLE(vio, ibmveth_device_table); diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 55af32e9bf08..183ba97785b0 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1370,7 +1370,7 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) */ static struct vio_device_id veth_device_table[] __devinitdata = { { "vlan", "" }, - { NULL, NULL } + { "", "" } }; MODULE_DEVICE_TABLE(vio, veth_device_table); diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index fe09d145542a..2cb3c8340ca8 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1442,7 +1442,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev) */ static struct vio_device_id ibmvscsi_device_table[] __devinitdata = { {"vscsi", "IBM,v-scsi"}, - {0,} + { "", "" } }; MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 85420bb37d58..03f1b95f433b 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -52,11 +53,6 @@ struct vio_dev { struct device dev; }; -struct vio_device_id { - char *type; - char *compat; -}; - struct vio_driver { struct list_head node; char *name; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 97bbccdbcca3..47da39ba3f03 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -1,6 +1,6 @@ /* * Device tables which are exported to userspace via - * scripts/table2alias.c. You must keep that file in sync with this + * scripts/mod/file2alias.c. You must keep that file in sync with this * header. */ @@ -190,6 +190,11 @@ struct of_device_id #endif }; +/* VIO */ +struct vio_device_id { + char type[32]; + char compat[32]; +}; /* PCMCIA */ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 5180405c1a84..d8ee38aede26 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -341,6 +341,22 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali return 1; } +static int do_vio_entry(const char *filename, struct vio_device_id *vio, + char *alias) +{ + char *tmp; + + sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*", + vio->compat[0] ? vio->compat : "*"); + + /* Replace all whitespace with underscores */ + for (tmp = alias; tmp && *tmp; tmp++) + if (isspace (*tmp)) + *tmp = '_'; + + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -422,6 +438,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, else if (sym_is(symname, "__mod_of_device_table")) do_table(symval, sym->st_size, sizeof(struct of_device_id), do_of_entry, mod); + else if (sym_is(symname, "__mod_vio_device_table")) + do_table(symval, sym->st_size, sizeof(struct vio_device_id), + do_vio_entry, mod); } From 45e2a6e4e5e22acd4321f69e84b726c2a568dacf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 13:15:50 +1000 Subject: [PATCH 498/584] [PATCH] Create include/asm-powerpc The ppc and ppc64 trees are hopefully going to merge over time, so this patch begins the process by creating a place for the merging of the header files. Create include/asm-powerpc (and move linkage.h into it from asm-{ppc,ppc64} since we don't like empty directories). Modify the ppc and ppc64 Makefiles to cope. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc/Makefile | 11 ++++++++++- arch/ppc64/Makefile | 9 +++++++++ include/{asm-ppc => asm-powerpc}/linkage.h | 0 include/asm-ppc64/linkage.h | 6 ------ 4 files changed, 19 insertions(+), 7 deletions(-) rename include/{asm-ppc => asm-powerpc}/linkage.h (100%) delete mode 100644 include/asm-ppc64/linkage.h diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile index f9b0d778dd82..d1b6e6dcb504 100644 --- a/arch/ppc/Makefile +++ b/arch/ppc/Makefile @@ -21,11 +21,13 @@ CC := $(CC) -m32 endif LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic -CPPFLAGS += -Iarch/$(ARCH) +CPPFLAGS += -Iarch/$(ARCH) -Iinclude3 AFLAGS += -Iarch/$(ARCH) CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe \ -ffixed-r2 -mmultiple CPP = $(CC) -E $(CFLAGS) +# Temporary hack until we have migrated to asm-powerpc +LINUXINCLUDE += -Iinclude3 CHECKFLAGS += -D__powerpc__ @@ -101,6 +103,7 @@ endef archclean: $(Q)$(MAKE) $(clean)=arch/ppc/boot + $(Q)rm -rf include3 prepare: include/asm-$(ARCH)/offsets.h checkbin @@ -110,6 +113,12 @@ arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \ include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s $(call filechk,gen-asm-offsets) +# Temporary hack until we have migrated to asm-powerpc +include/asm: include3/asm +include3/asm: + $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi + $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm + # Use the file '.tmp_gas_check' for binutils tests, as gas won't output # to stdout and these checks are run even on install targets. TOUT := .tmp_gas_check diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index 731b84758331..6350cce82efb 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -55,6 +55,8 @@ LDFLAGS := -m elf64ppc LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) CFLAGS += -msoft-float -pipe -mminimal-toc -mtraceback=none \ -mcall-aixdesc +# Temporary hack until we have migrated to asm-powerpc +CPPFLAGS += -Iinclude3 GCC_VERSION := $(call cc-version) GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;) @@ -112,6 +114,7 @@ all: $(KBUILD_IMAGE) archclean: $(Q)$(MAKE) $(clean)=$(boot) + $(Q)rm -rf include3 prepare: include/asm-ppc64/offsets.h @@ -121,6 +124,12 @@ arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \ include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s $(call filechk,gen-asm-offsets) +# Temporary hack until we have migrated to asm-powerpc +include/asm: include3/asm +include3/asm: + $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi; + $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm + define archhelp echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' echo ' zImage.initrd- Compressed kernel image with initrd attached,' diff --git a/include/asm-ppc/linkage.h b/include/asm-powerpc/linkage.h similarity index 100% rename from include/asm-ppc/linkage.h rename to include/asm-powerpc/linkage.h diff --git a/include/asm-ppc64/linkage.h b/include/asm-ppc64/linkage.h deleted file mode 100644 index 291c2d01c44f..000000000000 --- a/include/asm-ppc64/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -/* Nothing to see here... */ - -#endif From 88999ceb55bf959e63df0c911915166b005977fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 14:06:56 +1000 Subject: [PATCH 499/584] [PATCH] Move the identical files from include/asm-ppc{,64} Move the identical files from include/asm-ppc{,64}/ to include/asm-powerpc/. Remove hdreg.h completely as it is unused in the tree. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/{asm-ppc => asm-powerpc}/8253pit.h | 2 +- include/{asm-ppc => asm-powerpc}/agp.h | 0 include/asm-powerpc/cputime.h | 1 + include/{asm-ppc => asm-powerpc}/div64.h | 0 include/asm-powerpc/emergency-restart.h | 1 + include/{asm-ppc => asm-powerpc}/ipc.h | 0 include/{asm-ppc => asm-powerpc}/xor.h | 0 include/asm-ppc/cputime.h | 6 ------ include/asm-ppc/emergency-restart.h | 6 ------ include/asm-ppc/hdreg.h | 1 - include/asm-ppc64/8253pit.h | 10 ---------- include/asm-ppc64/agp.h | 23 ---------------------- include/asm-ppc64/cputime.h | 6 ------ include/asm-ppc64/div64.h | 1 - include/asm-ppc64/emergency-restart.h | 6 ------ include/asm-ppc64/hdreg.h | 1 - include/asm-ppc64/ipc.h | 1 - include/asm-ppc64/xor.h | 1 - 18 files changed, 3 insertions(+), 63 deletions(-) rename include/{asm-ppc => asm-powerpc}/8253pit.h (74%) rename include/{asm-ppc => asm-powerpc}/agp.h (100%) create mode 100644 include/asm-powerpc/cputime.h rename include/{asm-ppc => asm-powerpc}/div64.h (100%) create mode 100644 include/asm-powerpc/emergency-restart.h rename include/{asm-ppc => asm-powerpc}/ipc.h (100%) rename include/{asm-ppc => asm-powerpc}/xor.h (100%) delete mode 100644 include/asm-ppc/cputime.h delete mode 100644 include/asm-ppc/emergency-restart.h delete mode 100644 include/asm-ppc/hdreg.h delete mode 100644 include/asm-ppc64/8253pit.h delete mode 100644 include/asm-ppc64/agp.h delete mode 100644 include/asm-ppc64/cputime.h delete mode 100644 include/asm-ppc64/div64.h delete mode 100644 include/asm-ppc64/emergency-restart.h delete mode 100644 include/asm-ppc64/hdreg.h delete mode 100644 include/asm-ppc64/ipc.h delete mode 100644 include/asm-ppc64/xor.h diff --git a/include/asm-ppc/8253pit.h b/include/asm-powerpc/8253pit.h similarity index 74% rename from include/asm-ppc/8253pit.h rename to include/asm-powerpc/8253pit.h index 285f78488ccb..862708a749b0 100644 --- a/include/asm-ppc/8253pit.h +++ b/include/asm-powerpc/8253pit.h @@ -5,6 +5,6 @@ #ifndef _8253PIT_H #define _8253PIT_H -#define PIT_TICK_RATE 1193182UL +#define PIT_TICK_RATE 1193182UL #endif diff --git a/include/asm-ppc/agp.h b/include/asm-powerpc/agp.h similarity index 100% rename from include/asm-ppc/agp.h rename to include/asm-powerpc/agp.h diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h new file mode 100644 index 000000000000..6d68ad7e0ea3 --- /dev/null +++ b/include/asm-powerpc/cputime.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ppc/div64.h b/include/asm-powerpc/div64.h similarity index 100% rename from include/asm-ppc/div64.h rename to include/asm-powerpc/div64.h diff --git a/include/asm-powerpc/emergency-restart.h b/include/asm-powerpc/emergency-restart.h new file mode 100644 index 000000000000..3711bd9d50bd --- /dev/null +++ b/include/asm-powerpc/emergency-restart.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ppc/ipc.h b/include/asm-powerpc/ipc.h similarity index 100% rename from include/asm-ppc/ipc.h rename to include/asm-powerpc/ipc.h diff --git a/include/asm-ppc/xor.h b/include/asm-powerpc/xor.h similarity index 100% rename from include/asm-ppc/xor.h rename to include/asm-powerpc/xor.h diff --git a/include/asm-ppc/cputime.h b/include/asm-ppc/cputime.h deleted file mode 100644 index 8e9faf5ce720..000000000000 --- a/include/asm-ppc/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_CPUTIME_H -#define __PPC_CPUTIME_H - -#include - -#endif /* __PPC_CPUTIME_H */ diff --git a/include/asm-ppc/emergency-restart.h b/include/asm-ppc/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/include/asm-ppc/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/include/asm-ppc/hdreg.h b/include/asm-ppc/hdreg.h deleted file mode 100644 index 7f7fd1af0af3..000000000000 --- a/include/asm-ppc/hdreg.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/8253pit.h b/include/asm-ppc64/8253pit.h deleted file mode 100644 index 285f78488ccb..000000000000 --- a/include/asm-ppc64/8253pit.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * 8253/8254 Programmable Interval Timer - */ - -#ifndef _8253PIT_H -#define _8253PIT_H - -#define PIT_TICK_RATE 1193182UL - -#endif diff --git a/include/asm-ppc64/agp.h b/include/asm-ppc64/agp.h deleted file mode 100644 index ca9e423307f4..000000000000 --- a/include/asm-ppc64/agp.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef AGP_H -#define AGP_H 1 - -#include - -/* nothing much needed here */ - -#define map_page_into_agp(page) -#define unmap_page_from_agp(page) -#define flush_agp_mappings() -#define flush_agp_cache() mb() - -/* Convert a physical address to an address suitable for the GART. */ -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) - -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - -#endif diff --git a/include/asm-ppc64/cputime.h b/include/asm-ppc64/cputime.h deleted file mode 100644 index 8e9faf5ce720..000000000000 --- a/include/asm-ppc64/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_CPUTIME_H -#define __PPC_CPUTIME_H - -#include - -#endif /* __PPC_CPUTIME_H */ diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/include/asm-ppc64/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/emergency-restart.h b/include/asm-ppc64/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/include/asm-ppc64/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/include/asm-ppc64/hdreg.h b/include/asm-ppc64/hdreg.h deleted file mode 100644 index 7f7fd1af0af3..000000000000 --- a/include/asm-ppc64/hdreg.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/ipc.h b/include/asm-ppc64/ipc.h deleted file mode 100644 index a46e3d9c2a3f..000000000000 --- a/include/asm-ppc64/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/xor.h b/include/asm-ppc64/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/include/asm-ppc64/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include From 6f9aa727433fe7647869c9b64ce2f7b5feac0052 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 14:08:11 +1000 Subject: [PATCH 500/584] [PATCH] Move all the very similar files to asm-powerpc They differed in either simple comments or in the protecting ifdefs. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/{asm-ppc => asm-powerpc}/errno.h | 0 include/{asm-ppc => asm-powerpc}/ioctl.h | 0 include/{asm-ppc => asm-powerpc}/ioctls.h | 0 include/{asm-ppc64 => asm-powerpc}/local.h | 0 include/{asm-ppc => asm-powerpc}/namei.h | 0 include/asm-powerpc/percpu.h | 1 + include/{asm-ppc => asm-powerpc}/poll.h | 0 include/asm-powerpc/resource.h | 1 + include/{asm-ppc => asm-powerpc}/shmparam.h | 0 include/{asm-ppc => asm-powerpc}/string.h | 0 include/{asm-ppc => asm-powerpc}/unaligned.h | 0 include/asm-ppc/local.h | 6 - include/asm-ppc/percpu.h | 6 - include/asm-ppc/resource.h | 6 - include/asm-ppc64/errno.h | 18 --- include/asm-ppc64/ioctl.h | 74 ------------ include/asm-ppc64/ioctls.h | 114 ------------------- include/asm-ppc64/namei.h | 23 ---- include/asm-ppc64/percpu.h | 6 - include/asm-ppc64/poll.h | 32 ------ include/asm-ppc64/resource.h | 6 - include/asm-ppc64/shmparam.h | 13 --- include/asm-ppc64/string.h | 35 ------ include/asm-ppc64/unaligned.h | 21 ---- 24 files changed, 2 insertions(+), 360 deletions(-) rename include/{asm-ppc => asm-powerpc}/errno.h (100%) rename include/{asm-ppc => asm-powerpc}/ioctl.h (100%) rename include/{asm-ppc => asm-powerpc}/ioctls.h (100%) rename include/{asm-ppc64 => asm-powerpc}/local.h (100%) rename include/{asm-ppc => asm-powerpc}/namei.h (100%) create mode 100644 include/asm-powerpc/percpu.h rename include/{asm-ppc => asm-powerpc}/poll.h (100%) create mode 100644 include/asm-powerpc/resource.h rename include/{asm-ppc => asm-powerpc}/shmparam.h (100%) rename include/{asm-ppc => asm-powerpc}/string.h (100%) rename include/{asm-ppc => asm-powerpc}/unaligned.h (100%) delete mode 100644 include/asm-ppc/local.h delete mode 100644 include/asm-ppc/percpu.h delete mode 100644 include/asm-ppc/resource.h delete mode 100644 include/asm-ppc64/errno.h delete mode 100644 include/asm-ppc64/ioctl.h delete mode 100644 include/asm-ppc64/ioctls.h delete mode 100644 include/asm-ppc64/namei.h delete mode 100644 include/asm-ppc64/percpu.h delete mode 100644 include/asm-ppc64/poll.h delete mode 100644 include/asm-ppc64/resource.h delete mode 100644 include/asm-ppc64/shmparam.h delete mode 100644 include/asm-ppc64/string.h delete mode 100644 include/asm-ppc64/unaligned.h diff --git a/include/asm-ppc/errno.h b/include/asm-powerpc/errno.h similarity index 100% rename from include/asm-ppc/errno.h rename to include/asm-powerpc/errno.h diff --git a/include/asm-ppc/ioctl.h b/include/asm-powerpc/ioctl.h similarity index 100% rename from include/asm-ppc/ioctl.h rename to include/asm-powerpc/ioctl.h diff --git a/include/asm-ppc/ioctls.h b/include/asm-powerpc/ioctls.h similarity index 100% rename from include/asm-ppc/ioctls.h rename to include/asm-powerpc/ioctls.h diff --git a/include/asm-ppc64/local.h b/include/asm-powerpc/local.h similarity index 100% rename from include/asm-ppc64/local.h rename to include/asm-powerpc/local.h diff --git a/include/asm-ppc/namei.h b/include/asm-powerpc/namei.h similarity index 100% rename from include/asm-ppc/namei.h rename to include/asm-powerpc/namei.h diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h new file mode 100644 index 000000000000..06a959d67234 --- /dev/null +++ b/include/asm-powerpc/percpu.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ppc/poll.h b/include/asm-powerpc/poll.h similarity index 100% rename from include/asm-ppc/poll.h rename to include/asm-powerpc/poll.h diff --git a/include/asm-powerpc/resource.h b/include/asm-powerpc/resource.h new file mode 100644 index 000000000000..04bc4db8921b --- /dev/null +++ b/include/asm-powerpc/resource.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ppc/shmparam.h b/include/asm-powerpc/shmparam.h similarity index 100% rename from include/asm-ppc/shmparam.h rename to include/asm-powerpc/shmparam.h diff --git a/include/asm-ppc/string.h b/include/asm-powerpc/string.h similarity index 100% rename from include/asm-ppc/string.h rename to include/asm-powerpc/string.h diff --git a/include/asm-ppc/unaligned.h b/include/asm-powerpc/unaligned.h similarity index 100% rename from include/asm-ppc/unaligned.h rename to include/asm-powerpc/unaligned.h diff --git a/include/asm-ppc/local.h b/include/asm-ppc/local.h deleted file mode 100644 index b08e3eced10e..000000000000 --- a/include/asm-ppc/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_LOCAL_H -#define __PPC_LOCAL_H - -#include - -#endif /* __PPC_LOCAL_H */ diff --git a/include/asm-ppc/percpu.h b/include/asm-ppc/percpu.h deleted file mode 100644 index d66667cd5878..000000000000 --- a/include/asm-ppc/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_PPC_PERCPU__ -#define __ARCH_PPC_PERCPU__ - -#include - -#endif /* __ARCH_PPC_PERCPU__ */ diff --git a/include/asm-ppc/resource.h b/include/asm-ppc/resource.h deleted file mode 100644 index 86a1ea23a6ed..000000000000 --- a/include/asm-ppc/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PPC_RESOURCE_H -#define _PPC_RESOURCE_H - -#include - -#endif diff --git a/include/asm-ppc64/errno.h b/include/asm-ppc64/errno.h deleted file mode 100644 index 69bc3b0c6cbe..000000000000 --- a/include/asm-ppc64/errno.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _PPC64_ERRNO_H -#define _PPC64_ERRNO_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#undef EDEADLOCK -#define EDEADLOCK 58 /* File locking deadlock error */ - -#define _LAST_ERRNO 516 - -#endif diff --git a/include/asm-ppc64/ioctl.h b/include/asm-ppc64/ioctl.h deleted file mode 100644 index 42b8c5da7fbc..000000000000 --- a/include/asm-ppc64/ioctl.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef _PPC64_IOCTL_H -#define _PPC64_IOCTL_H - - -/* - * This was copied from the alpha as it's a bit cleaner there. - * -- Cort - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 13 -#define _IOC_DIRBITS 3 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. - * And this turns out useful to catch old ioctl numbers in header - * files for us. - */ -#define _IOC_NONE 1U -#define _IOC_READ 2U -#define _IOC_WRITE 4U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* provoke compile error for invalid uses of size argument */ -extern unsigned int __invalid_size_argument_for_IOC; -#define _IOC_TYPECHECK(t) \ - ((sizeof(t) == sizeof(t[1]) && \ - sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ - sizeof(t) : __invalid_size_argument_for_IOC) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode them.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* various drivers, such as the pcmcia stuff, need these... */ -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif /* _PPC64_IOCTL_H */ diff --git a/include/asm-ppc64/ioctls.h b/include/asm-ppc64/ioctls.h deleted file mode 100644 index 48796bf3e4fc..000000000000 --- a/include/asm-ppc64/ioctls.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef _ASM_PPC64_IOCTLS_H -#define _ASM_PPC64_IOCTLS_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#define FIOCLEX _IO('f', 1) -#define FIONCLEX _IO('f', 2) -#define FIOASYNC _IOW('f', 125, int) -#define FIONBIO _IOW('f', 126, int) -#define FIONREAD _IOR('f', 127, int) -#define TIOCINQ FIONREAD -#define FIOQSIZE _IOR('f', 128, loff_t) - -#define TIOCGETP _IOR('t', 8, struct sgttyb) -#define TIOCSETP _IOW('t', 9, struct sgttyb) -#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ - -#define TIOCSETC _IOW('t', 17, struct tchars) -#define TIOCGETC _IOR('t', 18, struct tchars) -#define TCGETS _IOR('t', 19, struct termios) -#define TCSETS _IOW('t', 20, struct termios) -#define TCSETSW _IOW('t', 21, struct termios) -#define TCSETSF _IOW('t', 22, struct termios) - -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) - -#define TCSBRK _IO('t', 29) -#define TCXONC _IO('t', 30) -#define TCFLSH _IO('t', 31) - -#define TIOCSWINSZ _IOW('t', 103, struct winsize) -#define TIOCGWINSZ _IOR('t', 104, struct winsize) -#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ -#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ -#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ - -#define TIOCGLTC _IOR('t', 116, struct ltchars) -#define TIOCSLTC _IOW('t', 117, struct ltchars) -#define TIOCSPGRP _IOW('t', 118, int) -#define TIOCGPGRP _IOR('t', 119, int) - -#define TIOCEXCL 0x540C -#define TIOCNXCL 0x540D -#define TIOCSCTTY 0x540E - -#define TIOCSTI 0x5412 -#define TIOCMGET 0x5415 -#define TIOCMBIS 0x5416 -#define TIOCMBIC 0x5417 -#define TIOCMSET 0x5418 -# define TIOCM_LE 0x001 -# define TIOCM_DTR 0x002 -# define TIOCM_RTS 0x004 -# define TIOCM_ST 0x008 -# define TIOCM_SR 0x010 -# define TIOCM_CTS 0x020 -# define TIOCM_CAR 0x040 -# define TIOCM_RNG 0x080 -# define TIOCM_DSR 0x100 -# define TIOCM_CD TIOCM_CAR -# define TIOCM_RI TIOCM_RNG - -#define TIOCGSOFTCAR 0x5419 -#define TIOCSSOFTCAR 0x541A -#define TIOCLINUX 0x541C -#define TIOCCONS 0x541D -#define TIOCGSERIAL 0x541E -#define TIOCSSERIAL 0x541F -#define TIOCPKT 0x5420 -# define TIOCPKT_DATA 0 -# define TIOCPKT_FLUSHREAD 1 -# define TIOCPKT_FLUSHWRITE 2 -# define TIOCPKT_STOP 4 -# define TIOCPKT_START 8 -# define TIOCPKT_NOSTOP 16 -# define TIOCPKT_DOSTOP 32 - - -#define TIOCNOTTY 0x5422 -#define TIOCSETD 0x5423 -#define TIOCGETD 0x5424 -#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -#define TIOCSBRK 0x5427 /* BSD compatibility */ -#define TIOCCBRK 0x5428 /* BSD compatibility */ -#define TIOCGSID 0x5429 /* Return the session ID of FD */ -#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ -#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ - -#define TIOCSERCONFIG 0x5453 -#define TIOCSERGWILD 0x5454 -#define TIOCSERSWILD 0x5455 -#define TIOCGLCKTRMIOS 0x5456 -#define TIOCSLCKTRMIOS 0x5457 -#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ -#define TIOCSERGETLSR 0x5459 /* Get line status register */ - /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ -# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#define TIOCSERGETMULTI 0x545A /* Get multiport config */ -#define TIOCSERSETMULTI 0x545B /* Set multiport config */ - -#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ -#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ - -#endif /* _ASM_PPC64_IOCTLS_H */ diff --git a/include/asm-ppc64/namei.h b/include/asm-ppc64/namei.h deleted file mode 100644 index a1412a2d102a..000000000000 --- a/include/asm-ppc64/namei.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * linux/include/asm-ppc/namei.h - * Adapted from linux/include/asm-alpha/namei.h - * - * Included from linux/fs/namei.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __PPC64_NAMEI_H -#define __PPC64_NAMEI_H - -/* This dummy routine maybe changed to something useful - * for /usr/gnemul/ emulation stuff. - * Look at asm-sparc/namei.h for details. - */ - -#define __emul_prefix() NULL - -#endif /* __PPC64_NAMEI_H */ diff --git a/include/asm-ppc64/percpu.h b/include/asm-ppc64/percpu.h deleted file mode 100644 index 60a659a4ce1f..000000000000 --- a/include/asm-ppc64/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_PPC64_PERCPU__ -#define __ARCH_PPC64_PERCPU__ - -#include - -#endif /* __ARCH_PPC64_PERCPU__ */ diff --git a/include/asm-ppc64/poll.h b/include/asm-ppc64/poll.h deleted file mode 100644 index 370fa3ba0db4..000000000000 --- a/include/asm-ppc64/poll.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __PPC64_POLL_H -#define __PPC64_POLL_H - -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 -#define POLLRDNORM 0x0040 -#define POLLRDBAND 0x0080 -#define POLLWRNORM 0x0100 -#define POLLWRBAND 0x0200 -#define POLLMSG 0x0400 -#define POLLREMOVE 0x1000 - -struct pollfd { - int fd; - short events; - short revents; -}; - -#endif /* __PPC64_POLL_H */ diff --git a/include/asm-ppc64/resource.h b/include/asm-ppc64/resource.h deleted file mode 100644 index add031b9dfd4..000000000000 --- a/include/asm-ppc64/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PPC64_RESOURCE_H -#define _PPC64_RESOURCE_H - -#include - -#endif /* _PPC64_RESOURCE_H */ diff --git a/include/asm-ppc64/shmparam.h b/include/asm-ppc64/shmparam.h deleted file mode 100644 index b2825ceff05e..000000000000 --- a/include/asm-ppc64/shmparam.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _PPC64_SHMPARAM_H -#define _PPC64_SHMPARAM_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ - -#endif /* _PPC64_SHMPARAM_H */ diff --git a/include/asm-ppc64/string.h b/include/asm-ppc64/string.h deleted file mode 100644 index eeca68ef1e91..000000000000 --- a/include/asm-ppc64/string.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _PPC64_STRING_H_ -#define _PPC64_STRING_H_ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define __HAVE_ARCH_STRCPY -#define __HAVE_ARCH_STRNCPY -#define __HAVE_ARCH_STRLEN -#define __HAVE_ARCH_STRCMP -#define __HAVE_ARCH_STRCAT -#define __HAVE_ARCH_MEMSET -#define __HAVE_ARCH_MEMCPY -#define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR - -extern int strcasecmp(const char *, const char *); -extern int strncasecmp(const char *, const char *, int); -extern char * strcpy(char *,const char *); -extern char * strncpy(char *,const char *, __kernel_size_t); -extern __kernel_size_t strlen(const char *); -extern int strcmp(const char *,const char *); -extern char * strcat(char *, const char *); -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -extern void * memmove(void *,const void *,__kernel_size_t); -extern int memcmp(const void *,const void *,__kernel_size_t); -extern void * memchr(const void *,int,__kernel_size_t); - -#endif /* _PPC64_STRING_H_ */ diff --git a/include/asm-ppc64/unaligned.h b/include/asm-ppc64/unaligned.h deleted file mode 100644 index 636e93c4f379..000000000000 --- a/include/asm-ppc64/unaligned.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __PPC64_UNALIGNED_H -#define __PPC64_UNALIGNED_H - -/* - * The PowerPC can do unaligned accesses itself in big endian mode. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define get_unaligned(ptr) (*(ptr)) - -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif /* __PPC64_UNALIGNED_H */ From 7fea82ab1a74030f79a2adfac1af3d93b8638fc3 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sun, 28 Aug 2005 21:42:10 -0500 Subject: [PATCH 501/584] [PATCH] PPC64: Don't try to claim memory from OF at 1GB mark Some RS64-based machines (p620, F80, others) have problems with firmware returning 0xdeadbeef instead of failure to allocations that end at the 1GB mark. We have two options: 1. Detect the undocumented 0xdeadbeef return value and interpret it as a failure. 2. Avoid allocating that high. (2) is really the cleaner solution here. 768MB is plenty of room so use that as the max alloc_top instead of 1GB. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom_init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index adcf972711fc..122283a1d39a 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -892,7 +892,10 @@ static void __init prom_init_mem(void) if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR ) RELOC(alloc_top) = RELOC(rmo_top); else - RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top)); + /* Some RS64 machines have buggy firmware where claims up at 1GB + * fails. Cap at 768MB as a workaround. Still plenty of room. + */ + RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top)); prom_printf("memory layout at init:\n"); prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); From 04ed65190a5d1562220dd3a7fc9eac2402c7104c Mon Sep 17 00:00:00 2001 From: Jake Moilanen Date: Wed, 24 Aug 2005 15:22:12 -0500 Subject: [PATCH 502/584] [PATCH] oprofile PVR 970MP Here's the 970MP's PVR (processor version register) entry for oprofile. Signed-off-by: Jake Moilanen Signed-off-by: Paul Mackerras --- arch/ppc64/oprofile/common.c | 1 + include/asm-ppc64/processor.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c index b28bfda23d94..4acd1a424933 100644 --- a/arch/ppc64/oprofile/common.c +++ b/arch/ppc64/oprofile/common.c @@ -153,6 +153,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case PV_970: case PV_970FX: + case PV_970MP: model = &op_model_power4; model->num_counters = 8; ops->cpu_type = "ppc64/970"; diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 50b14c0ddb87..7bd4796f1236 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -268,6 +268,7 @@ #define PV_970FX 0x003C #define PV_630 0x0040 #define PV_630p 0x0041 +#define PV_970MP 0x0044 #define PV_BE 0x0070 /* Platforms supported by PPC64 */ From 717522ff44f1fbee5ea09e83d7cd4b5c956e30f9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2005 08:53:03 +1000 Subject: [PATCH 503/584] [PATCH] ppc64: Add CONFIG_HZ While ppc64 has the CONFIG_HZ Kconfig option, it wasnt actually being used. Connect it up and set all platforms to 250Hz. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- arch/ppc64/configs/g5_defconfig | 6 +++--- arch/ppc64/configs/iSeries_defconfig | 6 +++--- arch/ppc64/configs/maple_defconfig | 6 +++--- arch/ppc64/configs/pSeries_defconfig | 6 +++--- arch/ppc64/defconfig | 6 +++--- include/asm-ppc64/param.h | 4 +++- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/ppc64/configs/g5_defconfig b/arch/ppc64/configs/g5_defconfig index ab567741e80e..fc83d9330282 100644 --- a/arch/ppc64/configs/g5_defconfig +++ b/arch/ppc64/configs/g5_defconfig @@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_BKL is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig index 219c6677abcc..013d4e0e4003 100644 --- a/arch/ppc64/configs/iSeries_defconfig +++ b/arch/ppc64/configs/iSeries_defconfig @@ -94,10 +94,10 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_BKL is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y diff --git a/arch/ppc64/configs/maple_defconfig b/arch/ppc64/configs/maple_defconfig index 2033fe663dbe..dd42892cd873 100644 --- a/arch/ppc64/configs/maple_defconfig +++ b/arch/ppc64/configs/maple_defconfig @@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_BKL is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 297fd5229487..29f7b80b0efc 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig @@ -112,10 +112,10 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_BKL is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 CONFIG_EEH=y CONFIG_GENERIC_HARDIRQS=y CONFIG_PPC_RTAS=y diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index c361e7727b7a..7cb4750bb7a9 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig @@ -114,10 +114,10 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_BKL is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 CONFIG_EEH=y CONFIG_GENERIC_HARDIRQS=y CONFIG_PPC_RTAS=y diff --git a/include/asm-ppc64/param.h b/include/asm-ppc64/param.h index 1fad38dcf707..76c212d475b3 100644 --- a/include/asm-ppc64/param.h +++ b/include/asm-ppc64/param.h @@ -1,6 +1,8 @@ #ifndef _ASM_PPC64_PARAM_H #define _ASM_PPC64_PARAM_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -9,7 +11,7 @@ */ #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif From 5ff98ae18bec792d77bfea801aa4b3385b98b355 Mon Sep 17 00:00:00 2001 From: Joel Schopp Date: Thu, 11 Aug 2005 17:39:28 -0500 Subject: [PATCH 504/584] [PATCH] ppc64: of_device.c remove useless code Coverity found more unused code. Signed-off-by: Joel Schopp Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/of_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index b80e81984ba8..da580812ddfe 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c @@ -236,7 +236,6 @@ void of_device_unregister(struct of_device *ofdev) struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) { struct of_device *dev; - u32 *reg; dev = kmalloc(sizeof(*dev), GFP_KERNEL); if (!dev) @@ -250,7 +249,6 @@ struct of_device* of_platform_device_create(struct device_node *np, const char * dev->dev.bus = &of_platform_bus_type; dev->dev.release = of_release_dev; - reg = (u32 *)get_property(np, "reg", NULL); strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); if (of_device_register(dev) != 0) { From 597f95e2bfbe9b83ed8b0761ebf4e7d55fd4df17 Mon Sep 17 00:00:00 2001 From: Joel Schopp Date: Fri, 12 Aug 2005 14:34:58 -0500 Subject: [PATCH 505/584] [PATCH] ppc64: lparconfig.c memory leak This patch fixes a rare memory leak found by Coverity. Signed-off-by: Joel Schopp Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lparcfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 9d034ff062b1..edad361a8db0 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -273,6 +273,7 @@ static void parse_system_parameter_string(struct seq_file *m) if (!workbuffer) { printk(KERN_ERR "%s %s kmalloc failure at line %d \n", __FILE__, __FUNCTION__, __LINE__); + kfree(local_buffer); return; } #ifdef LPARCFG_DEBUG From 8913ca1c9ccb5eb6471afd419159729eef6e2730 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 27 Jul 2005 15:47:23 +1000 Subject: [PATCH 506/584] [PATCH] Remove nested feature sections The {BEGIN,END}_FTR_SECTION asm macros used in ppc64 to nop out sections of code at runtime cannot be nested. However, we do nest them in hash_low.S. We get away with it there, because there is nothing between the BEGIN markers for each section. However, that's confusing to someone reading the code. This patch removes the nested ifset and ifclr feature sections, replacing them with a single feature section in the full mask/value form. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/mm/hash_low.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S index fbff24827ae7..35eb49e1b890 100644 --- a/arch/ppc64/mm/hash_low.S +++ b/arch/ppc64/mm/hash_low.S @@ -128,13 +128,11 @@ _GLOBAL(__hash_page) /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) */ -BEGIN_FTR_SECTION BEGIN_FTR_SECTION mr r4,r30 mr r5,r7 bl .hash_page_do_lazy_icache -END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE) -END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE) +END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) /* At this point, r3 contains new PP bits, save them in * place of "access" in the param area (sic) From 7ef24b69f9ff4858d7242059fbb19477c10e6dd7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 25 Aug 2005 17:14:46 -0700 Subject: [PATCH 507/584] [PATCH] s2io build fix Damir Perisa reports: drivers/net/s2io.h:765: error: invalid lvalue in assignment drivers/net/s2io.h:766: error: invalid lvalue in assignment That's a gcc4 error. I don't see why the casts are there anyway.. Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/s2io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 5d9270730ca2..bc64d967f080 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -762,8 +762,8 @@ static inline u64 readq(void __iomem *addr) { u64 ret = 0; ret = readl(addr + 4); - (u64) ret <<= 32; - (u64) ret |= readl(addr); + ret <<= 32; + ret |= readl(addr); return ret; } From d8971fcb702e24d1e22c77fd1772f182ffee87e3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 29 Aug 2005 22:51:28 -0700 Subject: [PATCH 508/584] [INET]: compile errors when DEBUG is defined Fix build problem found by compiling driver with DEBUG defined that used tcp.h. Since pr_debug(arg) expands to printk("<7>" arg) the argument needs to be string that can be concatenated. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 8a87a3a4f107..651f824c1008 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,7 +147,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) } #ifdef INET_CSK_DEBUG else { - pr_debug(inet_csk_timer_bug_msg); + pr_debug("%s", inet_csk_timer_bug_msg); } #endif } @@ -180,7 +180,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } #ifdef INET_CSK_DEBUG else { - pr_debug(inet_csk_timer_bug_msg); + pr_debug("%s", inet_csk_timer_bug_msg); } #endif } From 3998b70fd0ab40a276147a0f55816d383fcbeb54 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 29 Jul 2005 11:41:55 +0200 Subject: [PATCH 509/584] [ALSA] WM97xx AC97 codec controls AC97 Codec o Enhanced current WM97xx support to provide additional controls and use the kcontrol suffix naming convention. o Added AC97_HAS_NO_MIC, AC97_HAS_NO_TONE and AC97_HAS_NO_STD_PCM. o Cleaned up WM97xx related comments. o Removed some wm9713 double mono controls and replaced with stereo controls. Signed-off-by: Liam Girdwood Signed-off-by: Takashi Iwai --- include/sound/ac97_codec.h | 3 + sound/pci/ac97/ac97_codec.c | 44 ++-- sound/pci/ac97/ac97_patch.c | 431 ++++++++++++++++++++++++++++-------- 3 files changed, 368 insertions(+), 110 deletions(-) diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h index 1309c12b8f71..cbe72e06c469 100644 --- a/include/sound/ac97_codec.h +++ b/include/sound/ac97_codec.h @@ -374,6 +374,9 @@ #define AC97_HAS_NO_PC_BEEP (1<<12) /* no PC Beep volume */ #define AC97_HAS_NO_VIDEO (1<<13) /* no Video volume */ #define AC97_HAS_NO_CD (1<<14) /* no CD volume */ +#define AC97_HAS_NO_MIC (1<<15) /* no MIC volume */ +#define AC97_HAS_NO_TONE (1<<16) /* no Tone volume */ +#define AC97_HAS_NO_STD_PCM (1<<17) /* no standard AC97 PCM volume and mute */ /* rates indexes */ #define AC97_RATES_FRONT_DAC 0 diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 6983eea226da..cbf790270c30 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1307,16 +1307,18 @@ static int snd_ac97_mixer_build(ac97_t * ac97) } /* build master tone controls */ - if (snd_ac97_try_volume_mix(ac97, AC97_MASTER_TONE)) { - for (idx = 0; idx < 2; idx++) { - if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_tone[idx], ac97))) < 0) - return err; - if (ac97->id == AC97_ID_YMF753) { - kctl->private_value &= ~(0xff << 16); - kctl->private_value |= 7 << 16; + if (!(ac97->flags & AC97_HAS_NO_TONE)) { + if (snd_ac97_try_volume_mix(ac97, AC97_MASTER_TONE)) { + for (idx = 0; idx < 2; idx++) { + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_tone[idx], ac97))) < 0) + return err; + if (ac97->id == AC97_ID_YMF753) { + kctl->private_value &= ~(0xff << 16); + kctl->private_value |= 7 << 16; + } } + snd_ac97_write_cache(ac97, AC97_MASTER_TONE, 0x0f0f); } - snd_ac97_write_cache(ac97, AC97_MASTER_TONE, 0x0f0f); } /* build PC Speaker controls */ @@ -1339,11 +1341,13 @@ static int snd_ac97_mixer_build(ac97_t * ac97) } /* build MIC controls */ - if (snd_ac97_try_volume_mix(ac97, AC97_MIC)) { - if ((err = snd_ac97_cmix_new(card, "Mic Playback", AC97_MIC, ac97)) < 0) - return err; - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_boost, ac97))) < 0) - return err; + if (!(ac97->flags & AC97_HAS_NO_MIC)) { + if (snd_ac97_try_volume_mix(ac97, AC97_MIC)) { + if ((err = snd_ac97_cmix_new(card, "Mic Playback", AC97_MIC, ac97)) < 0) + return err; + if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_boost, ac97))) < 0) + return err; + } } /* build Line controls */ @@ -1402,12 +1406,14 @@ static int snd_ac97_mixer_build(ac97_t * ac97) } snd_ac97_write_cache(ac97, AC97_PCM, init_val); } else { - if (ac97->flags & AC97_HAS_NO_PCM_VOL) - err = snd_ac97_cmute_new(card, "PCM Playback Switch", AC97_PCM, ac97); - else - err = snd_ac97_cmix_new(card, "PCM Playback", AC97_PCM, ac97); - if (err < 0) - return err; + if (!(ac97->flags & AC97_HAS_NO_STD_PCM)) { + if (ac97->flags & AC97_HAS_NO_PCM_VOL) + err = snd_ac97_cmute_new(card, "PCM Playback Switch", AC97_PCM, ac97); + else + err = snd_ac97_cmix_new(card, "PCM Playback", AC97_PCM, ac97); + if (err < 0) + return err; + } } /* build Capture controls */ diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 66edc857d3e6..d4bb99fc896c 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -370,141 +370,387 @@ int patch_yamaha_ymf753(ac97_t * ac97) * added support for WM9705,WM9708,WM9709,WM9710,WM9711,WM9712 and WM9717. */ -int patch_wolfson03(ac97_t * ac97) +static const snd_kcontrol_new_t wm97xx_snd_ac97_controls[] = { +AC97_DOUBLE("Front Playback Volume", AC97_WM97XX_FMIXER_VOL, 8, 0, 31, 1), +AC97_SINGLE("Front Playback Switch", AC97_WM97XX_FMIXER_VOL, 15, 1, 1), +}; + +int patch_wolfson_wm9703_specific(ac97_t * ac97) { /* This is known to work for the ViewSonic ViewPad 1000 - Randolph Bentson */ - - // WM9703/9707/9708/9717 - snd_ac97_write_cache(ac97, AC97_WM97XX_FMIXER_VOL, 0x0808); - snd_ac97_write_cache(ac97, AC97_GENERAL_PURPOSE, 0x8000); + * Randolph Bentson + * WM9703/9707/9708/9717 + */ + int err, i; + + for (i = 0; i < ARRAY_SIZE(wm97xx_snd_ac97_controls); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm97xx_snd_ac97_controls[i], ac97))) < 0) + return err; + } + snd_ac97_write_cache(ac97, AC97_WM97XX_FMIXER_VOL, 0x0808); return 0; } - + +static struct snd_ac97_build_ops patch_wolfson_wm9703_ops = { + .build_specific = patch_wolfson_wm9703_specific, +}; + +int patch_wolfson03(ac97_t * ac97) +{ + ac97->build_ops = &patch_wolfson_wm9703_ops; + return 0; +} + +static const snd_kcontrol_new_t wm9704_snd_ac97_controls[] = { +AC97_DOUBLE("Front Playback Volume", AC97_WM97XX_FMIXER_VOL, 8, 0, 31, 1), +AC97_SINGLE("Front Playback Switch", AC97_WM97XX_FMIXER_VOL, 15, 1, 1), +AC97_DOUBLE("Rear Playback Volume", AC97_WM9704_RMIXER_VOL, 8, 0, 31, 1), +AC97_SINGLE("Rear Playback Switch", AC97_WM9704_RMIXER_VOL, 15, 1, 1), +AC97_DOUBLE("Rear DAC Volume", AC97_WM9704_RPCM_VOL, 8, 0, 31, 1), +AC97_DOUBLE("Surround Volume", AC97_SURROUND_MASTER, 8, 0, 31, 1), +}; + +int patch_wolfson_wm9704_specific(ac97_t * ac97) +{ + int err, i; + for (i = 0; i < ARRAY_SIZE(wm9704_snd_ac97_controls); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm9704_snd_ac97_controls[i], ac97))) < 0) + return err; + } + /* patch for DVD noise */ + snd_ac97_write_cache(ac97, AC97_WM9704_TEST, 0x0200); + return 0; +} + +static struct snd_ac97_build_ops patch_wolfson_wm9704_ops = { + .build_specific = patch_wolfson_wm9704_specific, +}; + int patch_wolfson04(ac97_t * ac97) { - // WM9704M/9704Q - // set front and rear mixer volume - snd_ac97_write_cache(ac97, AC97_WM97XX_FMIXER_VOL, 0x0808); - snd_ac97_write_cache(ac97, AC97_WM9704_RMIXER_VOL, 0x0808); - - // patch for DVD noise - snd_ac97_write_cache(ac97, AC97_WM9704_TEST, 0x0200); - - // init vol - snd_ac97_write_cache(ac97, AC97_WM9704_RPCM_VOL, 0x0808); - - // set rear surround volume - snd_ac97_write_cache(ac97, AC97_SURROUND_MASTER, 0x0000); + /* WM9704M/9704Q */ + ac97->build_ops = &patch_wolfson_wm9704_ops; return 0; } - + +int patch_wolfson_wm9705_specific(ac97_t * ac97) +{ + int err, i; + for (i = 0; i < ARRAY_SIZE(wm97xx_snd_ac97_controls); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm97xx_snd_ac97_controls[i], ac97))) < 0) + return err; + } + snd_ac97_write_cache(ac97, 0x72, 0x0808); + return 0; +} + +static struct snd_ac97_build_ops patch_wolfson_wm9705_ops = { + .build_specific = patch_wolfson_wm9705_specific, +}; + int patch_wolfson05(ac97_t * ac97) { - // WM9705, WM9710 - // set front mixer volume - snd_ac97_write_cache(ac97, AC97_WM97XX_FMIXER_VOL, 0x0808); + /* WM9705, WM9710 */ + ac97->build_ops = &patch_wolfson_wm9705_ops; return 0; } +static const char* wm9711_alc_select[] = {"None", "Left", "Right", "Stereo"}; +static const char* wm9711_alc_mix[] = {"Stereo", "Right", "Left", "None"}; +static const char* wm9711_out3_src[] = {"Left", "VREF", "Left + Right", "Mono"}; +static const char* wm9711_out3_lrsrc[] = {"Master Mix", "Headphone Mix"}; +static const char* wm9711_rec_adc[] = {"Stereo", "Left", "Right", "Mute"}; +static const char* wm9711_base[] = {"Linear Control", "Adaptive Boost"}; +static const char* wm9711_rec_gain[] = {"+1.5dB Steps", "+0.75dB Steps"}; +static const char* wm9711_mic[] = {"Mic 1", "Differential", "Mic 2", "Stereo"}; +static const char* wm9711_rec_sel[] = + {"Mic 1", "NC", "NC", "Master Mix", "Line", "Headphone Mix", "Phone Mix", "Phone"}; +static const char* wm9711_ng_type[] = {"Constant Gain", "Mute"}; + +static const struct ac97_enum wm9711_enum[] = { +AC97_ENUM_SINGLE(AC97_PCI_SVID, 14, 4, wm9711_alc_select), +AC97_ENUM_SINGLE(AC97_VIDEO, 10, 4, wm9711_alc_mix), +AC97_ENUM_SINGLE(AC97_AUX, 9, 4, wm9711_out3_src), +AC97_ENUM_SINGLE(AC97_AUX, 8, 2, wm9711_out3_lrsrc), +AC97_ENUM_SINGLE(AC97_REC_SEL, 12, 4, wm9711_rec_adc), +AC97_ENUM_SINGLE(AC97_MASTER_TONE, 15, 2, wm9711_base), +AC97_ENUM_DOUBLE(AC97_REC_GAIN, 14, 6, 2, wm9711_rec_gain), +AC97_ENUM_SINGLE(AC97_MIC, 5, 4, wm9711_mic), +AC97_ENUM_DOUBLE(AC97_REC_SEL, 8, 0, 8, wm9711_rec_sel), +AC97_ENUM_SINGLE(AC97_PCI_SVID, 5, 2, wm9711_ng_type), +}; + +static const snd_kcontrol_new_t wm9711_snd_ac97_controls[] = { +AC97_SINGLE("ALC Target Volume", AC97_CODEC_CLASS_REV, 12, 15, 0), +AC97_SINGLE("ALC Hold Time", AC97_CODEC_CLASS_REV, 8, 15, 0), +AC97_SINGLE("ALC Decay Time", AC97_CODEC_CLASS_REV, 4, 15, 0), +AC97_SINGLE("ALC Attack Time", AC97_CODEC_CLASS_REV, 0, 15, 0), +AC97_ENUM("ALC Function", wm9711_enum[0]), +AC97_SINGLE("ALC Max Volume", AC97_PCI_SVID, 11, 7, 1), +AC97_SINGLE("ALC ZC Timeout", AC97_PCI_SVID, 9, 3, 1), +AC97_SINGLE("ALC ZC Switch", AC97_PCI_SVID, 8, 1, 0), +AC97_SINGLE("ALC NG Switch", AC97_PCI_SVID, 7, 1, 0), +AC97_ENUM("ALC NG Type", wm9711_enum[9]), +AC97_SINGLE("ALC NG Threshold", AC97_PCI_SVID, 0, 31, 1), + +AC97_SINGLE("Side Tone Switch", AC97_VIDEO, 15, 1, 1), +AC97_SINGLE("Side Tone Volume", AC97_VIDEO, 12, 7, 1), +AC97_ENUM("ALC Headphone Mux", wm9711_enum[1]), +AC97_SINGLE("ALC Headphone Volume", AC97_VIDEO, 7, 7, 1), + +AC97_SINGLE("Out3 Switch", AC97_AUX, 15, 1, 1), +AC97_SINGLE("Out3 ZC Switch", AC97_AUX, 7, 1, 1), +AC97_ENUM("Out3 Mux", wm9711_enum[2]), +AC97_ENUM("Out3 LR Mux", wm9711_enum[3]), +AC97_SINGLE("Out3 Volume", AC97_AUX, 0, 31, 1), + +AC97_SINGLE("Beep to Headphone Switch", AC97_PC_BEEP, 15, 1, 1), +AC97_SINGLE("Beep to Headphone Volume", AC97_PC_BEEP, 12, 7, 1), +AC97_SINGLE("Beep to Side Tone Switch", AC97_PC_BEEP, 11, 1, 1), +AC97_SINGLE("Beep to Side Tone Volume", AC97_PC_BEEP, 8, 7, 1), +AC97_SINGLE("Beep to Phone Switch", AC97_PC_BEEP, 7, 1, 1), +AC97_SINGLE("Beep to Phone Volume", AC97_PC_BEEP, 4, 7, 1), + +AC97_SINGLE("Aux to Headphone Switch", AC97_CD, 15, 1, 1), +AC97_SINGLE("Aux to Headphone Volume", AC97_CD, 12, 7, 1), +AC97_SINGLE("Aux to Side Tone Switch", AC97_CD, 11, 1, 1), +AC97_SINGLE("Aux to Side Tone Volume", AC97_CD, 8, 7, 1), +AC97_SINGLE("Aux to Phone Switch", AC97_CD, 7, 1, 1), +AC97_SINGLE("Aux to Phone Volume", AC97_CD, 4, 7, 1), + +AC97_SINGLE("Phone to Headphone Switch", AC97_PHONE, 15, 1, 1), +AC97_SINGLE("Phone to Master Switch", AC97_PHONE, 14, 1, 1), + +AC97_SINGLE("Line to Headphone Switch", AC97_LINE, 15, 1, 1), +AC97_SINGLE("Line to Master Switch", AC97_LINE, 14, 1, 1), +AC97_SINGLE("Line to Phone Switch", AC97_LINE, 13, 1, 1), + +AC97_SINGLE("PCM Playback to Headphone Switch", AC97_PCM, 15, 1, 1), +AC97_SINGLE("PCM Playback to Master Switch", AC97_PCM, 14, 1, 1), +AC97_SINGLE("PCM Playback to Phone Switch", AC97_PCM, 13, 1, 1), + +AC97_SINGLE("Capture 20dB Boost Switch", AC97_REC_SEL, 14, 1, 0), +AC97_ENUM("Capture to Phone Mux", wm9711_enum[4]), +AC97_SINGLE("Capture to Phone 20dB Boost Switch", AC97_REC_SEL, 11, 1, 1), +AC97_ENUM("Capture Select", wm9711_enum[8]), + +AC97_SINGLE("3D Upper Cut-off Switch", AC97_3D_CONTROL, 5, 1, 1), +AC97_SINGLE("3D Lower Cut-off Switch", AC97_3D_CONTROL, 4, 1, 1), + +AC97_ENUM("Bass Control", wm9711_enum[5]), +AC97_SINGLE("Bass Cut-off Switch", AC97_MASTER_TONE, 12, 1, 1), +AC97_SINGLE("Tone Cut-off Switch", AC97_MASTER_TONE, 4, 1, 1), +AC97_SINGLE("Playback Attenuate (-6dB) Switch", AC97_MASTER_TONE, 6, 1, 0), + +AC97_SINGLE("ADC Switch", AC97_REC_GAIN, 15, 1, 1), +AC97_ENUM("Capture Volume Steps", wm9711_enum[6]), +AC97_DOUBLE("Capture Volume", AC97_REC_GAIN, 8, 0, 15, 1), +AC97_SINGLE("Capture ZC Switch", AC97_REC_GAIN, 7, 1, 0), + +AC97_SINGLE("Mic 1 to Phone Switch", AC97_MIC, 14, 1, 1), +AC97_SINGLE("Mic 2 to Phone Switch", AC97_MIC, 13, 1, 1), +AC97_ENUM("Mic Select Source", wm9711_enum[7]), +AC97_SINGLE("Mic 1 Volume", AC97_MIC, 8, 32, 1), +AC97_SINGLE("Mic 20dB Boost Switch", AC97_MIC, 7, 1, 0), + +AC97_SINGLE("Master ZC Switch", AC97_MASTER, 7, 1, 0), +AC97_SINGLE("Headphone ZC Switch", AC97_HEADPHONE, 7, 1, 0), +AC97_SINGLE("Mono ZC Switch", AC97_MASTER_MONO, 7, 1, 0), +}; + +int patch_wolfson_wm9711_specific(ac97_t * ac97) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(wm9711_snd_ac97_controls); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm9711_snd_ac97_controls[i], ac97))) < 0) + return err; + } + snd_ac97_write_cache(ac97, AC97_CODEC_CLASS_REV, 0x0808); + snd_ac97_write_cache(ac97, AC97_PCI_SVID, 0x0808); + snd_ac97_write_cache(ac97, AC97_VIDEO, 0x0808); + snd_ac97_write_cache(ac97, AC97_AUX, 0x0808); + snd_ac97_write_cache(ac97, AC97_PC_BEEP, 0x0808); + snd_ac97_write_cache(ac97, AC97_CD, 0x0000); + return 0; +} + +static struct snd_ac97_build_ops patch_wolfson_wm9711_ops = { + .build_specific = patch_wolfson_wm9711_specific, +}; + int patch_wolfson11(ac97_t * ac97) { - // WM9711, WM9712 - // set out3 volume - snd_ac97_write_cache(ac97, AC97_WM9711_OUT3VOL, 0x0808); + /* WM9711, WM9712 */ + ac97->build_ops = &patch_wolfson_wm9711_ops; + + ac97->flags |= AC97_HAS_NO_REC_GAIN | AC97_STEREO_MUTES | AC97_HAS_NO_MIC | + AC97_HAS_NO_PC_BEEP | AC97_HAS_NO_VIDEO | AC97_HAS_NO_CD; + return 0; } -static const char* wm9713_mic_mixer[] = {"Stereo", "Mic1", "Mic2", "Mute"}; +static const char* wm9713_mic_mixer[] = {"Stereo", "Mic 1", "Mic 2", "Mute"}; static const char* wm9713_rec_mux[] = {"Stereo", "Left", "Right", "Mute"}; -static const char* wm9713_rec_src_l[] = {"Mic1", "Mic2", "Line L", "Mono In", "HP Mix L", "Spk Mix", "Mono Mix", "Zh"}; -static const char* wm9713_rec_src_r[] = {"Mic1", "Mic2", "Line R", "Mono In", "HP Mix R", "Spk Mix", "Mono Mix", "Zh"}; +static const char* wm9713_rec_src[] = + {"Mic 1", "Mic 2", "Line", "Mono In", "Headphone Mix", "Master Mix", + "Mono Mix", "Zh"}; +static const char* wm9713_rec_gain[] = {"+1.5dB Steps", "+0.75dB Steps"}; +static const char* wm9713_alc_select[] = {"None", "Left", "Right", "Stereo"}; +static const char* wm9713_mono_pga[] = {"Vmid", "Zh", "Mono Mix", "Inv 1"}; +static const char* wm9713_spk_pga[] = + {"Vmid", "Zh", "Headphone Mix", "Master Mix", "Inv", "NC", "NC", "NC"}; +static const char* wm9713_hp_pga[] = {"Vmid", "Zh", "Headphone Mix", "NC"}; +static const char* wm9713_out3_pga[] = {"Vmid", "Zh", "Inv 1", "NC"}; +static const char* wm9713_out4_pga[] = {"Vmid", "Zh", "Inv 2", "NC"}; +static const char* wm9713_dac_inv[] = + {"Off", "Mono Mix", "Master Mix", "Headphone Mix L", "Headphone Mix R", + "Headphone Mix Mono", "NC", "Vmid"}; +static const char* wm9713_base[] = {"Linear Control", "Adaptive Boost"}; +static const char* wm9713_ng_type[] = {"Constant Gain", "Mute"}; static const struct ac97_enum wm9713_enum[] = { AC97_ENUM_SINGLE(AC97_LINE, 3, 4, wm9713_mic_mixer), AC97_ENUM_SINGLE(AC97_VIDEO, 14, 4, wm9713_rec_mux), AC97_ENUM_SINGLE(AC97_VIDEO, 9, 4, wm9713_rec_mux), -AC97_ENUM_SINGLE(AC97_VIDEO, 3, 8, wm9713_rec_src_l), -AC97_ENUM_SINGLE(AC97_VIDEO, 0, 8, wm9713_rec_src_r), +AC97_ENUM_DOUBLE(AC97_VIDEO, 3, 0, 8, wm9713_rec_src), +AC97_ENUM_DOUBLE(AC97_CD, 14, 6, 2, wm9713_rec_gain), +AC97_ENUM_SINGLE(AC97_PCI_SVID, 14, 4, wm9713_alc_select), +AC97_ENUM_SINGLE(AC97_REC_GAIN, 14, 4, wm9713_mono_pga), +AC97_ENUM_DOUBLE(AC97_REC_GAIN, 11, 8, 8, wm9713_spk_pga), +AC97_ENUM_DOUBLE(AC97_REC_GAIN, 6, 4, 4, wm9713_hp_pga), +AC97_ENUM_SINGLE(AC97_REC_GAIN, 2, 4, wm9713_out3_pga), +AC97_ENUM_SINGLE(AC97_REC_GAIN, 0, 4, wm9713_out4_pga), +AC97_ENUM_DOUBLE(AC97_REC_GAIN_MIC, 13, 10, 8, wm9713_dac_inv), +AC97_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 15, 2, wm9713_base), +AC97_ENUM_SINGLE(AC97_PCI_SVID, 5, 2, wm9713_ng_type), }; -static const snd_kcontrol_new_t wm13_snd_ac97_controls_line_in[] = { +static const snd_kcontrol_new_t wm13_snd_ac97_controls[] = { AC97_DOUBLE("Line In Volume", AC97_PC_BEEP, 8, 0, 31, 1), -AC97_SINGLE("Line In to Headphone Mute", AC97_PC_BEEP, 15, 1, 1), -AC97_SINGLE("Line In to Speaker Mute", AC97_PC_BEEP, 14, 1, 1), -AC97_SINGLE("Line In to Mono Mute", AC97_PC_BEEP, 13, 1, 1), +AC97_SINGLE("Line In to Headphone Switch", AC97_PC_BEEP, 15, 1, 1), +AC97_SINGLE("Line In to Master Switch", AC97_PC_BEEP, 14, 1, 1), +AC97_SINGLE("Line In to Mono Switch", AC97_PC_BEEP, 13, 1, 1), + +AC97_DOUBLE("PCM Playback Volume", AC97_PHONE, 8, 0, 31, 1), +AC97_SINGLE("PCM Playback to Headphone Switch", AC97_PHONE, 15, 1, 1), +AC97_SINGLE("PCM Playback to Master Switch", AC97_PHONE, 14, 1, 1), +AC97_SINGLE("PCM Playback to Mono Switch", AC97_PHONE, 13, 1, 1), + +AC97_SINGLE("Mic 1 Volume", AC97_MIC, 8, 31, 1), +AC97_SINGLE("Mic 2 Volume", AC97_MIC, 0, 31, 1), +AC97_SINGLE("Mic 1 to Mono Switch", AC97_LINE, 7, 1, 1), +AC97_SINGLE("Mic 2 to Mono Switch", AC97_LINE, 6, 1, 1), +AC97_SINGLE("Mic Boost (+20dB) Switch", AC97_LINE, 5, 1, 0), +AC97_ENUM("Mic to Headphone Mux", wm9713_enum[0]), +AC97_SINGLE("Mic Headphone Mixer Volume", AC97_LINE, 0, 7, 1), + +AC97_SINGLE("Capture Switch", AC97_CD, 15, 1, 1), +AC97_ENUM("Capture Volume Steps", wm9713_enum[4]), +AC97_DOUBLE("Capture Volume", AC97_CD, 8, 0, 15, 0), +AC97_SINGLE("Capture ZC Switch", AC97_CD, 7, 1, 0), + +AC97_ENUM("Capture to Headphone Mux", wm9713_enum[1]), +AC97_SINGLE("Capture to Headphone Volume", AC97_VIDEO, 11, 7, 1), +AC97_ENUM("Capture to Mono Mux", wm9713_enum[2]), +AC97_SINGLE("Capture to Mono Boost (+20dB) Switch", AC97_VIDEO, 8, 1, 0), +AC97_SINGLE("Capture ADC Boost (+20dB) Switch", AC97_VIDEO, 6, 1, 0), +AC97_ENUM("Capture Select", wm9713_enum[3]), + +AC97_SINGLE("ALC Target Volume", AC97_CODEC_CLASS_REV, 12, 15, 0), +AC97_SINGLE("ALC Hold Time", AC97_CODEC_CLASS_REV, 8, 15, 0), +AC97_SINGLE("ALC Decay Time ", AC97_CODEC_CLASS_REV, 4, 15, 0), +AC97_SINGLE("ALC Attack Time", AC97_CODEC_CLASS_REV, 0, 15, 0), +AC97_ENUM("ALC Function", wm9713_enum[5]), +AC97_SINGLE("ALC Max Volume", AC97_PCI_SVID, 11, 7, 0), +AC97_SINGLE("ALC ZC Timeout", AC97_PCI_SVID, 9, 3, 0), +AC97_SINGLE("ALC ZC Switch", AC97_PCI_SVID, 8, 1, 0), +AC97_SINGLE("ALC NG Switch", AC97_PCI_SVID, 7, 1, 0), +AC97_ENUM("ALC NG Type", wm9713_enum[13]), +AC97_SINGLE("ALC NG Threshold", AC97_PCI_SVID, 0, 31, 0), + +AC97_DOUBLE("Master ZC Switch", AC97_MASTER, 14, 6, 1, 0), +AC97_DOUBLE("Headphone ZC Switch", AC97_HEADPHONE, 14, 6, 1, 0), +AC97_DOUBLE("Out3/4 ZC Switch", AC97_MASTER_MONO, 14, 6, 1, 0), +AC97_SINGLE("Master Right Switch", AC97_MASTER, 7, 1, 1), +AC97_SINGLE("Headphone Right Switch", AC97_HEADPHONE, 7, 1, 1), +AC97_SINGLE("Out3/4 Right Switch", AC97_MASTER_MONO, 7, 1, 1), + +AC97_SINGLE("Mono In to Headphone Switch", AC97_MASTER_TONE, 15, 1, 1), +AC97_SINGLE("Mono In to Master Switch", AC97_MASTER_TONE, 14, 1, 1), +AC97_SINGLE("Mono In Volume", AC97_MASTER_TONE, 8, 31, 1), +AC97_SINGLE("Mono Switch", AC97_MASTER_TONE, 7, 1, 1), +AC97_SINGLE("Mono ZC Switch", AC97_MASTER_TONE, 6, 1, 0), +AC97_SINGLE("Mono Volume", AC97_MASTER_TONE, 0, 31, 1), + +AC97_SINGLE("PC Beep to Headphone Switch", AC97_AUX, 15, 1, 1), +AC97_SINGLE("PC Beep to Headphone Volume", AC97_AUX, 12, 7, 1), +AC97_SINGLE("PC Beep to Master Switch", AC97_AUX, 11, 1, 1), +AC97_SINGLE("PC Beep to Master Volume", AC97_AUX, 8, 7, 1), +AC97_SINGLE("PC Beep to Mono Switch", AC97_AUX, 7, 1, 1), +AC97_SINGLE("PC Beep to Mono Volume", AC97_AUX, 4, 7, 1), + +AC97_SINGLE("Voice to Headphone Switch", AC97_PCM, 15, 1, 1), +AC97_SINGLE("Voice to Headphone Volume", AC97_PCM, 12, 7, 1), +AC97_SINGLE("Voice to Master Switch", AC97_PCM, 11, 1, 1), +AC97_SINGLE("Voice to Master Volume", AC97_PCM, 8, 7, 1), +AC97_SINGLE("Voice to Mono Switch", AC97_PCM, 7, 1, 1), +AC97_SINGLE("Voice to Mono Volume", AC97_PCM, 4, 7, 1), + +AC97_SINGLE("Aux to Headphone Switch", AC97_REC_SEL, 15, 1, 1), +AC97_SINGLE("Aux to Headphone Volume", AC97_REC_SEL, 12, 7, 1), +AC97_SINGLE("Aux to Master Switch", AC97_REC_SEL, 11, 1, 1), +AC97_SINGLE("Aux to Master Volume", AC97_REC_SEL, 8, 7, 1), +AC97_SINGLE("Aux to Mono Switch", AC97_REC_SEL, 7, 1, 1), +AC97_SINGLE("Aux to Mono Volume", AC97_REC_SEL, 4, 7, 1), + +AC97_ENUM("Mono Input Mux", wm9713_enum[6]), +AC97_ENUM("Master Input Mux", wm9713_enum[7]), +AC97_ENUM("Headphone Input Mux", wm9713_enum[8]), +AC97_ENUM("Out 3 Input Mux", wm9713_enum[9]), +AC97_ENUM("Out 4 Input Mux", wm9713_enum[10]), + +AC97_ENUM("Bass Control", wm9713_enum[12]), +AC97_SINGLE("Bass Cut-off Switch", AC97_GENERAL_PURPOSE, 12, 1, 1), +AC97_SINGLE("Tone Cut-off Switch", AC97_GENERAL_PURPOSE, 4, 1, 1), +AC97_SINGLE("Playback Attenuate (-6dB) Switch", AC97_GENERAL_PURPOSE, 6, 1, 0), +AC97_SINGLE("Bass Volume", AC97_GENERAL_PURPOSE, 8, 15, 1), +AC97_SINGLE("Tone Volume", AC97_GENERAL_PURPOSE, 0, 15, 1), }; -static const snd_kcontrol_new_t wm13_snd_ac97_controls_dac[] = { -AC97_DOUBLE("DAC Volume", AC97_PHONE, 8, 0, 31, 1), -AC97_SINGLE("DAC to Headphone Mute", AC97_PHONE, 15, 1, 1), -AC97_SINGLE("DAC to Speaker Mute", AC97_PHONE, 14, 1, 1), -AC97_SINGLE("DAC to Mono Mute", AC97_PHONE, 13, 1, 1), +static const snd_kcontrol_new_t wm13_snd_ac97_controls_3d[] = { +AC97_ENUM("Inv Input Mux", wm9713_enum[11]), +AC97_SINGLE("3D Upper Cut-off Switch", AC97_REC_GAIN_MIC, 5, 1, 0), +AC97_SINGLE("3D Lower Cut-off Switch", AC97_REC_GAIN_MIC, 4, 1, 0), +AC97_SINGLE("3D Depth", AC97_REC_GAIN_MIC, 0, 15, 1), }; -static const snd_kcontrol_new_t wm13_snd_ac97_controls_mic[] = { -AC97_SINGLE("MICA Volume", AC97_MIC, 8, 31, 1), -AC97_SINGLE("MICB Volume", AC97_MIC, 0, 31, 1), -AC97_SINGLE("MICA to Mono Mute", AC97_LINE, 7, 1, 1), -AC97_SINGLE("MICB to Mono Mute", AC97_LINE, 6, 1, 1), -AC97_SINGLE("MIC Boost (+20dB)", AC97_LINE, 5, 1, 1), -AC97_ENUM("MIC Headphone Routing", wm9713_enum[0]), -AC97_SINGLE("MIC Headphone Mixer Volume", AC97_LINE, 0, 7, 1) -}; - -static const snd_kcontrol_new_t wm13_snd_ac97_controls_adc[] = { -AC97_SINGLE("ADC Mute", AC97_CD, 15, 1, 1), -AC97_DOUBLE("Gain Step Size (1.5dB/0.75dB)", AC97_CD, 14, 6, 1, 1), -AC97_DOUBLE("ADC Volume",AC97_CD, 8, 0, 15, 0), -AC97_SINGLE("ADC Zero Cross", AC97_CD, 7, 1, 1), -}; - -static const snd_kcontrol_new_t wm13_snd_ac97_controls_recsel[] = { -AC97_ENUM("Record to Headphone Path", wm9713_enum[1]), -AC97_SINGLE("Record to Headphone Volume", AC97_VIDEO, 11, 7, 0), -AC97_ENUM("Record to Mono Path", wm9713_enum[2]), -AC97_SINGLE("Record to Mono Boost (+20dB)", AC97_VIDEO, 8, 1, 0), -AC97_SINGLE("Record ADC Boost (+20dB)", AC97_VIDEO, 6, 1, 0), -AC97_ENUM("Record Select Left", wm9713_enum[3]), -AC97_ENUM("Record Select Right", wm9713_enum[4]), -}; +static int patch_wolfson_wm9713_3d (ac97_t * ac97) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_3d); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_3d[i], ac97))) < 0) + return err; + } + return 0; +} static int patch_wolfson_wm9713_specific(ac97_t * ac97) { int err, i; - for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_line_in); i++) { - if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_line_in[i], ac97))) < 0) + for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls); i++) { + if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls[i], ac97))) < 0) return err; } snd_ac97_write_cache(ac97, AC97_PC_BEEP, 0x0808); - - for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_dac); i++) { - if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_dac[i], ac97))) < 0) - return err; - } snd_ac97_write_cache(ac97, AC97_PHONE, 0x0808); - - for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_mic); i++) { - if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_mic[i], ac97))) < 0) - return err; - } snd_ac97_write_cache(ac97, AC97_MIC, 0x0808); snd_ac97_write_cache(ac97, AC97_LINE, 0x00da); - - for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_adc); i++) { - if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_adc[i], ac97))) < 0) - return err; - } snd_ac97_write_cache(ac97, AC97_CD, 0x0808); - - for (i = 0; i < ARRAY_SIZE(wm13_snd_ac97_controls_recsel); i++) { - if ((err = snd_ctl_add(ac97->bus->card, snd_ac97_cnew(&wm13_snd_ac97_controls_recsel[i], ac97))) < 0) - return err; - } snd_ac97_write_cache(ac97, AC97_VIDEO, 0xd612); snd_ac97_write_cache(ac97, AC97_REC_GAIN, 0x1ba0); - return 0; } @@ -525,6 +771,7 @@ static void patch_wolfson_wm9713_resume (ac97_t * ac97) static struct snd_ac97_build_ops patch_wolfson_wm9713_ops = { .build_specific = patch_wolfson_wm9713_specific, + .build_3d = patch_wolfson_wm9713_3d, #ifdef CONFIG_PM .suspend = patch_wolfson_wm9713_suspend, .resume = patch_wolfson_wm9713_resume @@ -533,10 +780,12 @@ static struct snd_ac97_build_ops patch_wolfson_wm9713_ops = { int patch_wolfson13(ac97_t * ac97) { + /* WM9713, WM9714 */ ac97->build_ops = &patch_wolfson_wm9713_ops; ac97->flags |= AC97_HAS_NO_REC_GAIN | AC97_STEREO_MUTES | AC97_HAS_NO_PHONE | - AC97_HAS_NO_PC_BEEP | AC97_HAS_NO_VIDEO | AC97_HAS_NO_CD; + AC97_HAS_NO_PC_BEEP | AC97_HAS_NO_VIDEO | AC97_HAS_NO_CD | AC97_HAS_NO_TONE | + AC97_HAS_NO_STD_PCM; snd_ac97_write_cache(ac97, AC97_EXTENDED_MID, 0xda00); snd_ac97_write_cache(ac97, AC97_EXTENDED_MSTATUS, 0x3810); From cb8e2f83851ff17b1b361644e82420a923fbc318 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Jul 2005 11:54:32 +0200 Subject: [PATCH 510/584] [ALSA] hda-codec - Fix reordering of surround channels HDA Codec driver - Fixed the reordering of surround channels. Originally reported by Nicolas GRAZIANO . - Show the selected ssid when debug option is set. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e2cf02387289..d3ac7530ec89 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1541,8 +1541,11 @@ int snd_hda_check_board_config(struct hda_codec *codec, const struct hda_board_c for (c = tbl; c->modelname || c->pci_subvendor; c++) { if (c->pci_subvendor == subsystem_vendor && (! c->pci_subdevice /* all match */|| - (c->pci_subdevice == subsystem_device))) + (c->pci_subdevice == subsystem_device))) { + snd_printdd(KERN_INFO "hda_codec: PCI %x:%x, codec config %d is selected\n", + subsystem_vendor, subsystem_device, c->config); return c->config; + } } } return -1; @@ -1803,11 +1806,25 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c cfg->line_out_pins[j] = nid; } - /* Swap surround and CLFE: the association order is front/CLFE/surr/back */ - if (cfg->line_outs >= 3) { + /* Reorder the surround channels + * ALSA sequence is front/surr/clfe/side + * HDA sequence is: + * 4-ch: front/surr => OK as it is + * 6-ch: front/clfe/surr + * 8-ch: front/clfe/side/surr + */ + switch (cfg->line_outs) { + case 3: nid = cfg->line_out_pins[1]; cfg->line_out_pins[1] = cfg->line_out_pins[2]; cfg->line_out_pins[2] = nid; + break; + case 4: + nid = cfg->line_out_pins[1]; + cfg->line_out_pins[1] = cfg->line_out_pins[3]; + cfg->line_out_pins[3] = cfg->line_out_pins[2]; + cfg->line_out_pins[2] = nid; + break; } return 0; From d827560c96346e0b8b02c1000cc347d73488c818 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Jul 2005 11:56:41 +0200 Subject: [PATCH 511/584] [ALSA] hda-codec - Add default config for ASUS P5AD2 HDA Codec driver Added the default config for ASUS P5AD2. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cmedia.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cmedia.c b/sound/pci/hda/patch_cmedia.c index 86f195f19eef..07fb4f5a54b3 100644 --- a/sound/pci/hda/patch_cmedia.c +++ b/sound/pci/hda/patch_cmedia.c @@ -647,6 +647,7 @@ static struct hda_board_config cmi9880_cfg_tbl[] = { { .modelname = "min_fp", .config = CMI_MIN_FP }, { .modelname = "full", .config = CMI_FULL }, { .modelname = "full_dig", .config = CMI_FULL_DIG }, + { .pci_subvendor = 0x1043, .pci_subdevice = 0x813d, .config = CMI_FULL_DIG }, /* ASUS P5AD2 */ { .modelname = "allout", .config = CMI_ALLOUT }, { .modelname = "auto", .config = CMI_AUTO }, {} /* terminator */ From b9f5a89c74e541533766dcda55d34a06253f60f3 Mon Sep 17 00:00:00 2001 From: Nicolas Graziano Date: Fri, 29 Jul 2005 12:17:20 +0200 Subject: [PATCH 512/584] [ALSA] hda-codec - Fix LFE volume/switch HDA Codec driver Fixed LFE volume/switch control. Signed-off-by: Nicolas Graziano Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index d3ac7530ec89..026ae726d875 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -749,12 +749,14 @@ int snd_hda_mixer_amp_volume_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t long *valp = ucontrol->value.integer.value; int change = 0; - if (chs & 1) + if (chs & 1) { change = snd_hda_codec_amp_update(codec, nid, 0, dir, idx, 0x7f, *valp); + valp++; + } if (chs & 2) change |= snd_hda_codec_amp_update(codec, nid, 1, dir, idx, - 0x7f, valp[1]); + 0x7f, *valp); return change; } @@ -796,12 +798,15 @@ int snd_hda_mixer_amp_switch_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t long *valp = ucontrol->value.integer.value; int change = 0; - if (chs & 1) + if (chs & 1) { change = snd_hda_codec_amp_update(codec, nid, 0, dir, idx, 0x80, *valp ? 0 : 0x80); + valp++; + } if (chs & 2) change |= snd_hda_codec_amp_update(codec, nid, 1, dir, idx, - 0x80, valp[1] ? 0 : 0x80); + 0x80, *valp ? 0 : 0x80); + return change; } From 42dfd0adab83c95890e562623264999914771763 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Jul 2005 14:18:39 +0200 Subject: [PATCH 513/584] [ALSA] via82xx - Add dxs entry for Acer Aspire 1524 WMLi VIA82xx driver Added the dxs entry for Acer Aspire 1524 WMLi. Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 4889600387c8..6e1c29f5799a 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2153,6 +2153,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci) { .subvendor = 0x1019, .subdevice = 0x0a81, .action = VIA_DXS_NO_VRA }, /* ECS K7VTA3 v8.0 */ { .subvendor = 0x1019, .subdevice = 0x0a85, .action = VIA_DXS_NO_VRA }, /* ECS L7VMM2 */ { .subvendor = 0x1025, .subdevice = 0x0033, .action = VIA_DXS_NO_VRA }, /* Acer Inspire 1353LM */ + { .subvendor = 0x1025, .subdevice = 0x0046, .action = VIA_DXS_SRC }, /* Acer Aspire 1524 WMLi */ { .subvendor = 0x1043, .subdevice = 0x8095, .action = VIA_DXS_NO_VRA }, /* ASUS A7V8X (FIXME: possibly VIA_DXS_ENABLE?)*/ { .subvendor = 0x1043, .subdevice = 0x80a1, .action = VIA_DXS_NO_VRA }, /* ASUS A7V8-X */ { .subvendor = 0x1043, .subdevice = 0x80b0, .action = VIA_DXS_NO_VRA }, /* ASUS A7V600 & K8V*/ From 67ed4161f60d76b3b27efc085b36357a6a90768c Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 29 Jul 2005 15:32:58 +0200 Subject: [PATCH 514/584] [ALSA] sound - fix .iface field of mixer control elements Documentation,CS46xx driver,EMU10K1/EMU10K2 driver,AD1848 driver SB16/AWE driver,CMIPCI driver,ENS1370/1+ driver,RME32 driver RME96 driver,ICE1712 driver,ICE1724 driver,KORG1212 driver RME HDSP driver,RME9652 driver This patch changes .iface to SNDRV_CTL_ELEM_IFACE_MIXER whre _PCM or _HWDEP was used in controls that are not associated with a specific PCM (sub)stream or hwdep device, and changes some controls that got inconsitent .iface values due to copy+paste errors. Furthermore, it makes sure that all control that do use _PCM or _HWDEP use the correct number in the .device field. Signed-off-by: Clemens Ladisch --- .../alsa/DocBook/writing-an-alsa-driver.tmpl | 15 +++-- include/sound/cs46xx.h | 2 +- include/sound/emu10k1.h | 2 +- sound/isa/ad1848/ad1848_lib.c | 1 + sound/isa/sb/sb16_main.c | 2 +- sound/pci/cmipci.c | 2 +- sound/pci/cs46xx/cs46xx.c | 2 +- sound/pci/cs46xx/cs46xx_lib.c | 4 +- sound/pci/emu10k1/emu10k1.c | 2 +- sound/pci/emu10k1/emumixer.c | 15 +++-- sound/pci/emu10k1/emupcm.c | 7 ++- sound/pci/ens1370.c | 6 +- sound/pci/ice1712/delta.c | 10 ++-- sound/pci/ice1712/ice1712.c | 4 +- sound/pci/ice1712/ice1724.c | 4 +- sound/pci/korg1212/korg1212.c | 4 +- sound/pci/rme32.c | 4 +- sound/pci/rme96.c | 4 +- sound/pci/rme9652/hdsp.c | 55 +++++++++---------- sound/pci/rme9652/hdspm.c | 27 ++++----- sound/pci/rme9652/rme9652.c | 24 ++++---- 21 files changed, 109 insertions(+), 87 deletions(-) diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index db0b7d2dc477..0475478c2484 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -3422,10 +3422,17 @@ struct _snd_pcm_runtime { The iface field specifies the type of - the control, - SNDRV_CTL_ELEM_IFACE_XXX. There are - MIXER, PCM, - CARD, etc. + the control, SNDRV_CTL_ELEM_IFACE_XXX, which + is usually MIXER. + Use CARD for global controls that are not + logically part of the mixer. + If the control is closely associated with some specific device on + the sound card, use HWDEP, + PCM, RAWMIDI, + TIMER, or SEQUENCER, and + specify the device number with the + device and + subdevice fields. diff --git a/include/sound/cs46xx.h b/include/sound/cs46xx.h index 182dd276ee74..9b94510eda60 100644 --- a/include/sound/cs46xx.h +++ b/include/sound/cs46xx.h @@ -1748,7 +1748,7 @@ int snd_cs46xx_pcm(cs46xx_t *chip, int device, snd_pcm_t **rpcm); int snd_cs46xx_pcm_rear(cs46xx_t *chip, int device, snd_pcm_t **rpcm); int snd_cs46xx_pcm_iec958(cs46xx_t *chip, int device, snd_pcm_t **rpcm); int snd_cs46xx_pcm_center_lfe(cs46xx_t *chip, int device, snd_pcm_t **rpcm); -int snd_cs46xx_mixer(cs46xx_t *chip); +int snd_cs46xx_mixer(cs46xx_t *chip, int spdif_device); int snd_cs46xx_midi(cs46xx_t *chip, int device, snd_rawmidi_t **rmidi); int snd_cs46xx_start_dsp(cs46xx_t *chip); int snd_cs46xx_gameport(cs46xx_t *chip); diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index c2ef3f023687..4e3993dfcefe 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1178,7 +1178,7 @@ int snd_p16v_free(emu10k1_t * emu); int snd_p16v_mixer(emu10k1_t * emu); int snd_emu10k1_pcm_multi(emu10k1_t * emu, int device, snd_pcm_t ** rpcm); int snd_emu10k1_fx8010_pcm(emu10k1_t * emu, int device, snd_pcm_t ** rpcm); -int snd_emu10k1_mixer(emu10k1_t * emu); +int snd_emu10k1_mixer(emu10k1_t * emu, int pcm_device, int multi_device); int snd_emu10k1_timer(emu10k1_t * emu, int device); int snd_emu10k1_fx8010_new(emu10k1_t *emu, int device, snd_hwdep_t ** rhwdep); diff --git a/sound/isa/ad1848/ad1848_lib.c b/sound/isa/ad1848/ad1848_lib.c index 8fb3db103e48..bc642dc94547 100644 --- a/sound/isa/ad1848/ad1848_lib.c +++ b/sound/isa/ad1848/ad1848_lib.c @@ -1196,6 +1196,7 @@ int snd_ad1848_add_ctl(ad1848_t *chip, const char *name, int index, int type, un .put = snd_ad1848_put_double, }, [AD1848_MIX_CAPTURE] = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_ad1848_info_mux, .get = snd_ad1848_get_mux, .put = snd_ad1848_put_mux, diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c index a6a0fa516268..a99e642a68b5 100644 --- a/sound/isa/sb/sb16_main.c +++ b/sound/isa/sb/sb16_main.c @@ -729,7 +729,7 @@ static int snd_sb16_dma_control_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_valu } static snd_kcontrol_new_t snd_sb16_dma_control = { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "16-bit DMA Allocation", .info = snd_sb16_dma_control_info, .get = snd_sb16_dma_control_get, diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index f5a4ac1ceef9..b098b51099c2 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -1029,7 +1029,7 @@ static int snd_cmipci_spdif_mask_get(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_cmipci_spdif_mask __devinitdata = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_cmipci_spdif_mask_info, .get = snd_cmipci_spdif_mask_get, diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c index db212ecd792a..b9fff4ee6f9d 100644 --- a/sound/pci/cs46xx/cs46xx.c +++ b/sound/pci/cs46xx/cs46xx.c @@ -113,7 +113,7 @@ static int __devinit snd_card_cs46xx_probe(struct pci_dev *pci, return err; } #endif - if ((err = snd_cs46xx_mixer(chip)) < 0) { + if ((err = snd_cs46xx_mixer(chip, 2)) < 0) { snd_card_free(card); return err; } diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c index ff28af1f658e..1546aec02ea7 100644 --- a/sound/pci/cs46xx/cs46xx_lib.c +++ b/sound/pci/cs46xx/cs46xx_lib.c @@ -2440,7 +2440,7 @@ static int __devinit cs46xx_detect_codec(cs46xx_t *chip, int codec) return -ENXIO; } -int __devinit snd_cs46xx_mixer(cs46xx_t *chip) +int __devinit snd_cs46xx_mixer(cs46xx_t *chip, int spdif_device) { snd_card_t *card = chip->card; snd_ctl_elem_id_t id; @@ -2476,6 +2476,8 @@ int __devinit snd_cs46xx_mixer(cs46xx_t *chip) for (idx = 0; idx < ARRAY_SIZE(snd_cs46xx_controls); idx++) { snd_kcontrol_t *kctl; kctl = snd_ctl_new1(&snd_cs46xx_controls[idx], chip); + if (kctl && kctl->id.iface == SNDRV_CTL_ELEM_IFACE_PCM) + kctl->id.device = spdif_device; if ((err = snd_ctl_add(card, kctl)) < 0) return err; } diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index b17142cabead..fc377c4b666c 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -149,7 +149,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, } } - if ((err = snd_emu10k1_mixer(emu)) < 0) { + if ((err = snd_emu10k1_mixer(emu, 0, 3)) < 0) { snd_card_free(card); return err; } diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index 6be82c5fe138..279edaeb0663 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -295,7 +295,7 @@ static int snd_emu10k1_send_routing_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_emu10k1_send_routing_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "EMU10K1 PCM Send Routing", .count = 32, .info = snd_emu10k1_send_routing_info, @@ -364,7 +364,7 @@ static int snd_emu10k1_send_volume_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_emu10k1_send_volume_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "EMU10K1 PCM Send Volume", .count = 32, .info = snd_emu10k1_send_volume_info, @@ -427,7 +427,7 @@ static int snd_emu10k1_attn_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_emu10k1_attn_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "EMU10K1 PCM Volume", .count = 32, .info = snd_emu10k1_attn_info, @@ -737,7 +737,8 @@ static int rename_ctl(snd_card_t *card, const char *src, const char *dst) return -ENOENT; } -int __devinit snd_emu10k1_mixer(emu10k1_t *emu) +int __devinit snd_emu10k1_mixer(emu10k1_t *emu, + int pcm_device, int multi_device) { int err, pcm; snd_kcontrol_t *kctl; @@ -852,29 +853,35 @@ int __devinit snd_emu10k1_mixer(emu10k1_t *emu) if ((kctl = emu->ctl_send_routing = snd_ctl_new1(&snd_emu10k1_send_routing_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = pcm_device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = emu->ctl_send_volume = snd_ctl_new1(&snd_emu10k1_send_volume_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = pcm_device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = emu->ctl_attn = snd_ctl_new1(&snd_emu10k1_attn_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = pcm_device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = emu->ctl_efx_send_routing = snd_ctl_new1(&snd_emu10k1_efx_send_routing_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = multi_device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = emu->ctl_efx_send_volume = snd_ctl_new1(&snd_emu10k1_efx_send_volume_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = multi_device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = emu->ctl_efx_attn = snd_ctl_new1(&snd_emu10k1_efx_attn_control, emu)) == NULL) return -ENOMEM; + kctl->id.device = multi_device; if ((err = snd_ctl_add(card, kctl))) return err; diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 520b99af5f55..9c35f6dde1b5 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1682,6 +1682,7 @@ static void snd_emu10k1_pcm_efx_free(snd_pcm_t *pcm) int __devinit snd_emu10k1_pcm_efx(emu10k1_t * emu, int device, snd_pcm_t ** rpcm) { snd_pcm_t *pcm; + snd_kcontrol_t *kctl; int err; if (rpcm) @@ -1714,7 +1715,11 @@ int __devinit snd_emu10k1_pcm_efx(emu10k1_t * emu, int device, snd_pcm_t ** rpcm emu->efx_voices_mask[0] = 0xffff0000; emu->efx_voices_mask[1] = 0; } - snd_ctl_add(emu->card, snd_ctl_new1(&snd_emu10k1_pcm_efx_voices_mask, emu)); + kctl = snd_ctl_new1(&snd_emu10k1_pcm_efx_voices_mask, emu); + if (!kctl) + return -ENOMEM; + kctl->id.device = device; + snd_ctl_add(emu->card, kctl); snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), 64*1024, 64*1024); diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 78a81f3912a1..0c64e65bee97 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -1446,7 +1446,7 @@ static int snd_es1371_spdif_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t static snd_kcontrol_new_t snd_es1371_mixer_spdif[] __devinitdata = { ES1371_SPDIF("IEC958 Playback Switch"), { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_default_get, @@ -1454,13 +1454,13 @@ static snd_kcontrol_new_t snd_es1371_mixer_spdif[] __devinitdata = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_mask_get }, { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_stream_get, diff --git a/sound/pci/ice1712/delta.c b/sound/pci/ice1712/delta.c index eb20f73be61a..39fbe662965d 100644 --- a/sound/pci/ice1712/delta.c +++ b/sound/pci/ice1712/delta.c @@ -618,15 +618,15 @@ static int __devinit snd_ice1712_delta_init(ice1712_t *ice) */ static snd_kcontrol_new_t snd_ice1712_delta1010_wordclock_select __devinitdata = -ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_PCM, "Word Clock Sync", 0, ICE1712_DELTA_WORD_CLOCK_SELECT, 1, 0); +ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_MIXER, "Word Clock Sync", 0, ICE1712_DELTA_WORD_CLOCK_SELECT, 1, 0); static snd_kcontrol_new_t snd_ice1712_delta1010lt_wordclock_select __devinitdata = -ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_PCM, "Word Clock Sync", 0, ICE1712_DELTA_1010LT_WORDCLOCK, 1, 0); +ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_MIXER, "Word Clock Sync", 0, ICE1712_DELTA_1010LT_WORDCLOCK, 1, 0); static snd_kcontrol_new_t snd_ice1712_delta1010_wordclock_status __devinitdata = -ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_PCM, "Word Clock Status", 0, ICE1712_DELTA_WORD_CLOCK_STATUS, 1, SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE); +ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_MIXER, "Word Clock Status", 0, ICE1712_DELTA_WORD_CLOCK_STATUS, 1, SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE); static snd_kcontrol_new_t snd_ice1712_deltadio2496_spdif_in_select __devinitdata = -ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_PCM, "IEC958 Input Optical", 0, ICE1712_DELTA_SPDIF_INPUT_SELECT, 0, 0); +ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_MIXER, "IEC958 Input Optical", 0, ICE1712_DELTA_SPDIF_INPUT_SELECT, 0, 0); static snd_kcontrol_new_t snd_ice1712_delta_spdif_in_status __devinitdata = -ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_PCM, "Delta IEC958 Input Status", 0, ICE1712_DELTA_SPDIF_IN_STAT, 1, SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE); +ICE1712_GPIO(SNDRV_CTL_ELEM_IFACE_MIXER, "Delta IEC958 Input Status", 0, ICE1712_DELTA_SPDIF_IN_STAT, 1, SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE); static int __devinit snd_ice1712_delta_add_controls(ice1712_t *ice) diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index a2545a5b26c4..d51c72b721e0 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -1715,7 +1715,7 @@ static int snd_ice1712_spdif_maskp_get(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_ice1712_spdif_maskc __devinitdata = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_ice1712_spdif_info, .get = snd_ice1712_spdif_maskc_get, @@ -1724,7 +1724,7 @@ static snd_kcontrol_new_t snd_ice1712_spdif_maskc __devinitdata = static snd_kcontrol_new_t snd_ice1712_spdif_maskp __devinitdata = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK), .info = snd_ice1712_spdif_info, .get = snd_ice1712_spdif_maskp_get, diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 79b5f12e06fc..e819b9ffc266 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -1414,7 +1414,7 @@ static int snd_vt1724_spdif_maskp_get(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_vt1724_spdif_maskc __devinitdata = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_vt1724_spdif_info, .get = snd_vt1724_spdif_maskc_get, @@ -1423,7 +1423,7 @@ static snd_kcontrol_new_t snd_vt1724_spdif_maskc __devinitdata = static snd_kcontrol_new_t snd_vt1724_spdif_maskp __devinitdata = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK), .info = snd_vt1724_spdif_info, .get = snd_vt1724_spdif_maskp_get, diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 79d8eda54f0d..d2aa9c82d41e 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2067,7 +2067,7 @@ static int snd_korg1212_control_sync_put(snd_kcontrol_t * kcontrol, snd_ctl_elem }, \ { \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_WRITE, \ - .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = c_name " Monitor Phase Invert", \ .info = snd_korg1212_control_phase_info, \ .get = snd_korg1212_control_phase_get, \ @@ -2082,7 +2082,7 @@ static snd_kcontrol_new_t snd_korg1212_controls[] = { MON_MIXER(4, "ADAT-5"), MON_MIXER(5, "ADAT-6"), MON_MIXER(6, "ADAT-7"), MON_MIXER(7, "ADAT-8"), { .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_WRITE, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Sync Source", .info = snd_korg1212_control_sync_info, .get = snd_korg1212_control_sync_get, diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c index b7b554df6705..456be39e8e4a 100644 --- a/sound/pci/rme32.c +++ b/sound/pci/rme32.c @@ -1900,7 +1900,7 @@ static snd_kcontrol_new_t snd_rme32_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, CON_MASK), .info = snd_rme32_control_spdif_mask_info, .get = snd_rme32_control_spdif_mask_get, @@ -1908,7 +1908,7 @@ static snd_kcontrol_new_t snd_rme32_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, PRO_MASK), .info = snd_rme32_control_spdif_mask_info, .get = snd_rme32_control_spdif_mask_get, diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 10c4f45a913c..9645e9004a48 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -2266,7 +2266,7 @@ static snd_kcontrol_new_t snd_rme96_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_rme96_control_spdif_mask_info, .get = snd_rme96_control_spdif_mask_get, @@ -2276,7 +2276,7 @@ static snd_kcontrol_new_t snd_rme96_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK), .info = snd_rme96_control_spdif_mask_info, .get = snd_rme96_control_spdif_mask_get, diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 796621de5009..60eef845a074 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -1524,7 +1524,7 @@ static int snd_hdsp_control_spdif_mask_get(snd_kcontrol_t * kcontrol, snd_ctl_el } #define HDSP_SPDIF_IN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_spdif_in, \ @@ -1584,7 +1584,7 @@ static int snd_hdsp_put_spdif_in(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t } #define HDSP_SPDIF_OUT(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_hdsp_info_spdif_bits, \ .get = snd_hdsp_get_spdif_out, .put = snd_hdsp_put_spdif_out } @@ -1638,7 +1638,7 @@ static int snd_hdsp_put_spdif_out(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_ } #define HDSP_SPDIF_PROFESSIONAL(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_hdsp_info_spdif_bits, \ .get = snd_hdsp_get_spdif_professional, .put = snd_hdsp_put_spdif_professional } @@ -1683,7 +1683,7 @@ static int snd_hdsp_put_spdif_professional(snd_kcontrol_t * kcontrol, snd_ctl_el } #define HDSP_SPDIF_EMPHASIS(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_hdsp_info_spdif_bits, \ .get = snd_hdsp_get_spdif_emphasis, .put = snd_hdsp_put_spdif_emphasis } @@ -1728,7 +1728,7 @@ static int snd_hdsp_put_spdif_emphasis(snd_kcontrol_t * kcontrol, snd_ctl_elem_v } #define HDSP_SPDIF_NON_AUDIO(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_hdsp_info_spdif_bits, \ .get = snd_hdsp_get_spdif_nonaudio, .put = snd_hdsp_put_spdif_nonaudio } @@ -1773,7 +1773,7 @@ static int snd_hdsp_put_spdif_nonaudio(snd_kcontrol_t * kcontrol, snd_ctl_elem_v } #define HDSP_SPDIF_SAMPLE_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1834,7 +1834,7 @@ static int snd_hdsp_get_spdif_sample_rate(snd_kcontrol_t * kcontrol, snd_ctl_ele } #define HDSP_SYSTEM_SAMPLE_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1858,7 +1858,7 @@ static int snd_hdsp_get_system_sample_rate(snd_kcontrol_t * kcontrol, snd_ctl_el } #define HDSP_AUTOSYNC_SAMPLE_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1918,7 +1918,7 @@ static int snd_hdsp_get_autosync_sample_rate(snd_kcontrol_t * kcontrol, snd_ctl_ } #define HDSP_SYSTEM_CLOCK_MODE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1958,7 +1958,7 @@ static int snd_hdsp_get_system_clock_mode(snd_kcontrol_t * kcontrol, snd_ctl_ele } #define HDSP_CLOCK_SOURCE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_clock_source, \ @@ -2124,7 +2124,7 @@ static int snd_hdsp_put_clock_source_lock(snd_kcontrol_t * kcontrol, snd_ctl_ele } #define HDSP_DA_GAIN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_da_gain, \ @@ -2210,7 +2210,7 @@ static int snd_hdsp_put_da_gain(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t } #define HDSP_AD_GAIN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_ad_gain, \ @@ -2296,7 +2296,7 @@ static int snd_hdsp_put_ad_gain(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t } #define HDSP_PHONE_GAIN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_phone_gain, \ @@ -2382,7 +2382,7 @@ static int snd_hdsp_put_phone_gain(snd_kcontrol_t * kcontrol, snd_ctl_elem_value } #define HDSP_XLR_BREAKOUT_CABLE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_xlr_breakout_cable, \ @@ -2447,7 +2447,7 @@ static int snd_hdsp_put_xlr_breakout_cable(snd_kcontrol_t * kcontrol, snd_ctl_el Switching this on desactivates external ADAT */ #define HDSP_AEB(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_aeb, \ @@ -2508,7 +2508,7 @@ static int snd_hdsp_put_aeb(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t * uc } #define HDSP_PREF_SYNC_REF(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_pref_sync_ref, \ @@ -2641,7 +2641,7 @@ static int snd_hdsp_put_pref_sync_ref(snd_kcontrol_t * kcontrol, snd_ctl_elem_va } #define HDSP_AUTOSYNC_REF(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -2697,7 +2697,7 @@ static int snd_hdsp_get_autosync_ref(snd_kcontrol_t * kcontrol, snd_ctl_elem_val } #define HDSP_LINE_OUT(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_line_out, \ @@ -2757,7 +2757,7 @@ static int snd_hdsp_put_line_out(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t } #define HDSP_PRECISE_POINTER(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_CARD, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_precise_pointer, \ @@ -2811,7 +2811,7 @@ static int snd_hdsp_put_precise_pointer(snd_kcontrol_t * kcontrol, snd_ctl_elem_ } #define HDSP_USE_MIDI_TASKLET(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_CARD, \ .name = xname, \ .index = xindex, \ .info = snd_hdsp_info_use_midi_tasklet, \ @@ -2868,6 +2868,7 @@ static int snd_hdsp_put_use_midi_tasklet(snd_kcontrol_t * kcontrol, snd_ctl_elem { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ .name = xname, \ .index = xindex, \ + .device = 0, \ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_hdsp_info_mixer, \ @@ -2939,7 +2940,7 @@ static int snd_hdsp_put_mixer(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t * } #define HDSP_WC_SYNC_CHECK(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ @@ -2983,7 +2984,7 @@ static int snd_hdsp_get_wc_sync_check(snd_kcontrol_t * kcontrol, snd_ctl_elem_va } #define HDSP_SPDIF_SYNC_CHECK(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ @@ -3015,7 +3016,7 @@ static int snd_hdsp_get_spdif_sync_check(snd_kcontrol_t * kcontrol, snd_ctl_elem } #define HDSP_ADATSYNC_SYNC_CHECK(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ @@ -3046,7 +3047,7 @@ static int snd_hdsp_get_adatsync_sync_check(snd_kcontrol_t * kcontrol, snd_ctl_e } #define HDSP_ADAT_SYNC_CHECK \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_hdsp_info_sync_check, \ .get = snd_hdsp_get_adat_sync_check \ @@ -3103,7 +3104,7 @@ HDSP_XLR_BREAKOUT_CABLE("XLR Breakout Cable", 0) static snd_kcontrol_new_t snd_hdsp_controls[] = { { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_hdsp_control_spdif_info, .get = snd_hdsp_control_spdif_get, @@ -3111,7 +3112,7 @@ static snd_kcontrol_new_t snd_hdsp_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_hdsp_control_spdif_stream_info, .get = snd_hdsp_control_spdif_stream_get, @@ -3146,8 +3147,6 @@ HDSP_SPDIF_NON_AUDIO("IEC958 Non-audio Bit", 0), /* 'Sample Clock Source' complies with the alsa control naming scheme */ HDSP_CLOCK_SOURCE("Sample Clock Source", 0), { - /* FIXME: should be PCM or MIXER? */ - /* .iface = SNDRV_CTL_ELEM_IFACE_PCM, */ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Sample Clock Source Locking", .info = snd_hdsp_info_clock_source_lock, diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 9e86d0eb41ce..f6daec4e4d97 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1168,7 +1168,7 @@ static void hdspm_midi_tasklet(unsigned long arg) /* get the system sample rate which is set */ #define HDSPM_SYSTEM_SAMPLE_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1195,7 +1195,7 @@ static int snd_hdspm_get_system_sample_rate(snd_kcontrol_t * kcontrol, } #define HDSPM_AUTOSYNC_SAMPLE_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1264,7 +1264,7 @@ static int snd_hdspm_get_autosync_sample_rate(snd_kcontrol_t * kcontrol, } #define HDSPM_SYSTEM_CLOCK_MODE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1310,7 +1310,7 @@ static int snd_hdspm_get_system_clock_mode(snd_kcontrol_t * kcontrol, } #define HDSPM_CLOCK_SOURCE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_clock_source, \ @@ -1457,7 +1457,7 @@ static int snd_hdspm_put_clock_source(snd_kcontrol_t * kcontrol, } #define HDSPM_PREF_SYNC_REF(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_pref_sync_ref, \ @@ -1547,7 +1547,7 @@ static int snd_hdspm_put_pref_sync_ref(snd_kcontrol_t * kcontrol, } #define HDSPM_AUTOSYNC_REF(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ, \ @@ -1604,7 +1604,7 @@ static int snd_hdspm_get_autosync_ref(snd_kcontrol_t * kcontrol, } #define HDSPM_LINE_OUT(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_line_out, \ @@ -1668,7 +1668,7 @@ static int snd_hdspm_put_line_out(snd_kcontrol_t * kcontrol, } #define HDSPM_TX_64(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_tx_64, \ @@ -1731,7 +1731,7 @@ static int snd_hdspm_put_tx_64(snd_kcontrol_t * kcontrol, } #define HDSPM_C_TMS(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_c_tms, \ @@ -1794,7 +1794,7 @@ static int snd_hdspm_put_c_tms(snd_kcontrol_t * kcontrol, } #define HDSPM_SAFE_MODE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_safe_mode, \ @@ -1857,7 +1857,7 @@ static int snd_hdspm_put_safe_mode(snd_kcontrol_t * kcontrol, } #define HDSPM_INPUT_SELECT(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .info = snd_hdspm_info_input_select, \ @@ -1941,6 +1941,7 @@ static int snd_hdspm_put_input_select(snd_kcontrol_t * kcontrol, { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ .name = xname, \ .index = xindex, \ + .device = 0, \ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_hdspm_info_mixer, \ @@ -2124,7 +2125,7 @@ static int snd_hdspm_put_playback_mixer(snd_kcontrol_t * kcontrol, } #define HDSPM_WC_SYNC_CHECK(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ @@ -2170,7 +2171,7 @@ static int snd_hdspm_get_wc_sync_check(snd_kcontrol_t * kcontrol, #define HDSPM_MADI_SYNC_CHECK(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ .name = xname, \ .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 1bc9d0df8516..6e3a91dba3eb 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -893,7 +893,7 @@ static int snd_rme9652_control_spdif_mask_get(snd_kcontrol_t * kcontrol, snd_ctl } #define RME9652_ADAT1_IN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_adat1_in, \ .get = snd_rme9652_get_adat1_in, \ .put = snd_rme9652_put_adat1_in } @@ -971,7 +971,7 @@ static int snd_rme9652_put_adat1_in(snd_kcontrol_t * kcontrol, snd_ctl_elem_valu } #define RME9652_SPDIF_IN(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_spdif_in, \ .get = snd_rme9652_get_spdif_in, .put = snd_rme9652_put_spdif_in } @@ -1042,7 +1042,7 @@ static int snd_rme9652_put_spdif_in(snd_kcontrol_t * kcontrol, snd_ctl_elem_valu } #define RME9652_SPDIF_OUT(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_spdif_out, \ .get = snd_rme9652_get_spdif_out, .put = snd_rme9652_put_spdif_out } @@ -1110,7 +1110,7 @@ static int snd_rme9652_put_spdif_out(snd_kcontrol_t * kcontrol, snd_ctl_elem_val } #define RME9652_SYNC_MODE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_sync_mode, \ .get = snd_rme9652_get_sync_mode, .put = snd_rme9652_put_sync_mode } @@ -1195,7 +1195,7 @@ static int snd_rme9652_put_sync_mode(snd_kcontrol_t * kcontrol, snd_ctl_elem_val } #define RME9652_SYNC_PREF(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_sync_pref, \ .get = snd_rme9652_get_sync_pref, .put = snd_rme9652_put_sync_pref } @@ -1340,7 +1340,7 @@ static int snd_rme9652_put_thru(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t } #define RME9652_PASSTHRU(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_rme9652_info_passthru, \ .put = snd_rme9652_put_passthru, \ .get = snd_rme9652_get_passthru } @@ -1386,7 +1386,7 @@ static int snd_rme9652_put_passthru(snd_kcontrol_t * kcontrol, snd_ctl_elem_valu /* Read-only switches */ #define RME9652_SPDIF_RATE(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_rme9652_info_spdif_rate, \ .get = snd_rme9652_get_spdif_rate } @@ -1411,7 +1411,7 @@ static int snd_rme9652_get_spdif_rate(snd_kcontrol_t * kcontrol, snd_ctl_elem_va } #define RME9652_ADAT_SYNC(xname, xindex, xidx) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_rme9652_info_adat_sync, \ .get = snd_rme9652_get_adat_sync, .private_value = xidx } @@ -1447,7 +1447,7 @@ static int snd_rme9652_get_adat_sync(snd_kcontrol_t * kcontrol, snd_ctl_elem_val } #define RME9652_TC_VALID(xname, xindex) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ .info = snd_rme9652_info_tc_valid, \ .get = snd_rme9652_get_tc_valid } @@ -1529,7 +1529,7 @@ static int snd_rme9652_get_tc_value(void *private_data, static snd_kcontrol_new_t snd_rme9652_controls[] = { { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_rme9652_control_spdif_info, .get = snd_rme9652_control_spdif_get, @@ -1537,7 +1537,7 @@ static snd_kcontrol_new_t snd_rme9652_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_rme9652_control_spdif_stream_info, .get = snd_rme9652_control_spdif_stream_get, @@ -1568,7 +1568,7 @@ RME9652_SPDIF_OUT("IEC958 Output also on ADAT1", 0), RME9652_SYNC_MODE("Sync Mode", 0), RME9652_SYNC_PREF("Preferred Sync Source", 0), { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Channels Thru", .index = 0, .info = snd_rme9652_info_thru, From 0ca06a00e206b963587ac471e6d1c52bf33b9a18 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 29 Jul 2005 16:13:36 +0200 Subject: [PATCH 515/584] [ALSA] AC97 bus interface for ad-hoc drivers AC97 Codec,PCI drivers I've made the review changes and as requested I've pasted the RFC by Nicolas below:- 'I would like to know what people think of the following patch. It allows for a codec on an AC97 bus to be shared with other drivers which are completely unrelated to audio. It registers a new bus type, and whenever a codec instance is created then a device for it is also registered with the driver model using that bus type. This allows, for example, to use the extra features of the UCB1400 like the touchscreen interface and the additional GPIOs and ADCs available on that chip for battery monitoring. I have a working UCB1400 touchscreen driver here that simply registers with the driver model happily working alongside with audio features using this.' Changes over RFC:- o Now matches codec name within codec group. o Added ac97_dev_release() to stop kernel complaining about no release method for device. o Added 'config SND_AC97_BUS' to sound/pci/Kconfig and moved 'config SND_AC97_CODEC' out with the PCI=n statement. o module is now called snd-ac97-bus Signed-off-by: Liam Girdwood Signed-off-by: Nicolas Pitre Signed-off-by: Takashi Iwai --- include/sound/ac97_codec.h | 6 +++ sound/pci/Kconfig | 10 +++-- sound/pci/ac97/Makefile | 2 + sound/pci/ac97/ac97_bus.c | 79 +++++++++++++++++++++++++++++++++++++ sound/pci/ac97/ac97_codec.c | 35 ++++++++++++++++ 5 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 sound/pci/ac97/ac97_bus.c diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h index cbe72e06c469..2857cf0472df 100644 --- a/include/sound/ac97_codec.h +++ b/include/sound/ac97_codec.h @@ -26,6 +26,7 @@ */ #include +#include #include "pcm.h" #include "control.h" #include "info.h" @@ -523,6 +524,7 @@ struct _snd_ac97 { /* jack-sharing info */ unsigned char indep_surround; unsigned char channel_mode; + struct device dev; }; /* conditions */ @@ -602,4 +604,8 @@ struct ac97_enum { unsigned short mask; const char **texts; }; + +/* ad hoc AC97 device driver access */ +extern struct bus_type ac97_bus_type; + #endif /* __SOUND_AC97_CODEC_H */ diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 26b42bb20a0a..1e458919cce6 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -1,11 +1,15 @@ # ALSA PCI drivers -menu "PCI devices" - depends on SND!=n && PCI - config SND_AC97_CODEC tristate select SND_PCM + select SND_AC97_BUS + +config SND_AC97_BUS + tristate + +menu "PCI devices" + depends on SND!=n && PCI config SND_ALI5451 tristate "ALi M5451 PCI Audio Controller" diff --git a/sound/pci/ac97/Makefile b/sound/pci/ac97/Makefile index 3c3222122d8b..77b3482cb133 100644 --- a/sound/pci/ac97/Makefile +++ b/sound/pci/ac97/Makefile @@ -10,9 +10,11 @@ snd-ac97-codec-objs += ac97_proc.o endif snd-ak4531-codec-objs := ak4531_codec.o +snd-ac97-bus-objs := ac97_bus.o # Toplevel Module Dependency obj-$(CONFIG_SND_AC97_CODEC) += snd-ac97-codec.o obj-$(CONFIG_SND_ENS1370) += snd-ak4531-codec.o +obj-$(CONFIG_SND_AC97_BUS) += snd-ac97-bus.o obj-m := $(sort $(obj-m)) diff --git a/sound/pci/ac97/ac97_bus.c b/sound/pci/ac97/ac97_bus.c new file mode 100644 index 000000000000..227f8b9f67ce --- /dev/null +++ b/sound/pci/ac97/ac97_bus.c @@ -0,0 +1,79 @@ +/* + * Linux driver model AC97 bus interface + * + * Author: Nicolas Pitre + * Created: Jan 14, 2005 + * Copyright: (C) MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +/* + * Codec families have names seperated by commas, so we search for an + * individual codec name within the family string. + */ +static int ac97_bus_match(struct device *dev, struct device_driver *drv) +{ + return (strstr(dev->bus_id, drv->name) != NULL); +} + +static int ac97_bus_suspend(struct device *dev, pm_message_t state) +{ + int ret = 0; + + if (dev->driver && dev->driver->suspend) { + ret = dev->driver->suspend(dev, state, SUSPEND_DISABLE); + if (ret == 0) + ret = dev->driver->suspend(dev, state, SUSPEND_SAVE_STATE); + if (ret == 0) + ret = dev->driver->suspend(dev, state, SUSPEND_POWER_DOWN); + } + return ret; +} + +static int ac97_bus_resume(struct device *dev) +{ + int ret = 0; + + if (dev->driver && dev->driver->resume) { + ret = dev->driver->resume(dev, RESUME_POWER_ON); + if (ret == 0) + ret = dev->driver->resume(dev, RESUME_RESTORE_STATE); + if (ret == 0) + ret = dev->driver->resume(dev, RESUME_ENABLE); + } + return ret; +} + +struct bus_type ac97_bus_type = { + .name = "ac97", + .match = ac97_bus_match, + .suspend = ac97_bus_suspend, + .resume = ac97_bus_resume, +}; + +static int __init ac97_bus_init(void) +{ + return bus_register(&ac97_bus_type); +} + +subsys_initcall(ac97_bus_init); + +static void __exit ac97_bus_exit(void) +{ + bus_unregister(&ac97_bus_type); +} + +module_exit(ac97_bus_exit); + +EXPORT_SYMBOL(ac97_bus_type); + +MODULE_LICENSE("GPL"); diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index cbf790270c30..33dba10f03e8 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1813,6 +1813,39 @@ int snd_ac97_bus(snd_card_t *card, int num, ac97_bus_ops_t *ops, return 0; } +/* stop no dev release warning */ +static void ac97_device_release(struct device * dev) +{ +} + +/* register ac97 codec to bus */ +static int snd_ac97_dev_register(snd_device_t *device) +{ + ac97_t *ac97 = device->device_data; + int err; + + ac97->dev.bus = &ac97_bus_type; + ac97->dev.parent = ac97->bus->card->dev; + ac97->dev.platform_data = ac97; + ac97->dev.release = ac97_device_release; + strncpy(ac97->dev.bus_id, snd_ac97_get_short_name(ac97), BUS_ID_SIZE); + if ((err = device_register(&ac97->dev)) < 0) { + snd_printk(KERN_ERR "Can't register ac97 bus\n"); + ac97->dev.bus = NULL; + return err; + } + return 0; +} + +/* unregister ac97 codec */ +static int snd_ac97_dev_unregister(snd_device_t *device) +{ + ac97_t *ac97 = device->device_data; + if (ac97->dev.bus) + device_unregister(&ac97->dev); + return snd_ac97_free(ac97); +} + /* build_ops to do nothing */ static struct snd_ac97_build_ops null_build_ops; @@ -1846,6 +1879,8 @@ int snd_ac97_mixer(ac97_bus_t *bus, ac97_template_t *template, ac97_t **rac97) const ac97_codec_id_t *pid; static snd_device_ops_t ops = { .dev_free = snd_ac97_dev_free, + .dev_register = snd_ac97_dev_register, + .dev_unregister = snd_ac97_dev_unregister, }; snd_assert(rac97 != NULL, return -EINVAL); From 267d4d6475cf591a5f838449b40cbacfc2915e8b Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Fri, 29 Jul 2005 16:59:31 +0200 Subject: [PATCH 516/584] [ALSA] adds __init and in snd_memory_init() ALSA Core This patch adds the __init macro to snd_memory_init(). Signed-off-by: Henrik Kretzschmar Signed-off-by: Takashi Iwai --- sound/core/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/memory.c b/sound/core/memory.c index f6895577bf86..1622893d00a2 100644 --- a/sound/core/memory.c +++ b/sound/core/memory.c @@ -56,7 +56,7 @@ static DEFINE_SPINLOCK(snd_alloc_vmalloc_lock); #define VMALLOC_MAGIC 0x87654320 static snd_info_entry_t *snd_memory_info_entry; -void snd_memory_init(void) +void __init snd_memory_init(void) { snd_alloc_kmalloc = 0; snd_alloc_vmalloc = 0; From 506d4c6a5f909d78c3d624693f10d8cc0eafda48 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Aug 2005 14:02:47 +0200 Subject: [PATCH 517/584] [ALSA] ad1816a - Add clockfreq module option Documentation Added clockfreq module option for the card with a different clock frequency than 33kHz. Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/ALSA-Configuration.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index a18ecb92b356..5c49ba07e709 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -132,6 +132,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. mpu_irq - IRQ # for MPU-401 UART (PnP setup) dma1 - first DMA # for AD1816A chip (PnP setup) dma2 - second DMA # for AD1816A chip (PnP setup) + clockfreq - Clock frequency for AD1816A chip (default = 0, 33000Hz) Module supports up to 8 cards, autoprobe and PnP. From 10e8d78a94fc57f1bf11d50b97ff85b005e46d0b Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 3 Aug 2005 13:40:08 +0200 Subject: [PATCH 518/584] [ALSA] use SNDRV_CTL_NAME_IEC958 macro CMI8330 driver,ENS1370/1+ driver,FM801 driver,VIA82xx driver,AC97 Codec ALI5451 driver,CS46xx driver,EMU10K1/EMU10K2 driver,HDA Codec driver ICE1712 driver,ICE1724 driver Where appropriate, use the SNDRV_CTL_NAME_IEC958 instead of a literal string. Signed-off-by: Clemens Ladisch --- sound/isa/cmi8330.c | 4 ++-- sound/pci/ac97/ac97_patch.c | 8 ++++---- sound/pci/ali5451/ali5451.c | 4 ++-- sound/pci/cs46xx/cs46xx_lib.c | 6 +++--- sound/pci/emu10k1/emufx.c | 26 +++++++++++++------------- sound/pci/ens1370.c | 2 +- sound/pci/fm801.c | 8 ++++---- sound/pci/hda/patch_analog.c | 4 ++-- sound/pci/ice1712/ice1712.c | 6 +++--- sound/pci/ice1712/ice1724.c | 2 +- sound/pci/via82xx.c | 2 +- 11 files changed, 36 insertions(+), 36 deletions(-) diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c index 46776cc0c157..1fce8b9f37cf 100644 --- a/sound/isa/cmi8330.c +++ b/sound/isa/cmi8330.c @@ -194,8 +194,8 @@ AD1848_DOUBLE("Wavetable Capture Volume", 0, CMI8330_WAVGAIN, CMI8330_WAVGAIN, 4 AD1848_SINGLE("3D Control - Switch", 0, CMI8330_RMUX3D, 5, 1, 1), AD1848_SINGLE("PC Speaker Playback Volume", 0, CMI8330_OUTPUTVOL, 3, 3, 0), AD1848_SINGLE("FM Playback Switch", 0, CMI8330_RECMUX, 3, 1, 1), -AD1848_SINGLE("IEC958 Input Capture Switch", 0, CMI8330_RMUX3D, 7, 1, 1), -AD1848_SINGLE("IEC958 Input Playback Switch", 0, CMI8330_MUTEMUX, 7, 1, 1), +AD1848_SINGLE(SNDRV_CTL_NAME_IEC958("Input ",CAPTURE,SWITCH), 0, CMI8330_RMUX3D, 7, 1, 1), +AD1848_SINGLE(SNDRV_CTL_NAME_IEC958("Input ",PLAYBACK,SWITCH), 0, CMI8330_MUTEMUX, 7, 1, 1), }; #ifdef ENABLE_SB_MIXER diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index d4bb99fc896c..00fb51992460 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1955,7 +1955,7 @@ static const snd_kcontrol_new_t snd_ac97_controls_alc650[] = { }; static const snd_kcontrol_new_t snd_ac97_spdif_controls_alc650[] = { - AC97_SINGLE("IEC958 Capture Switch", AC97_ALC650_MULTICH, 11, 1, 0), + AC97_SINGLE(SNDRV_CTL_NAME_IEC958("",CAPTURE,SWITCH), AC97_ALC650_MULTICH, 11, 1, 0), AC97_SINGLE("Analog to IEC958 Output", AC97_ALC650_MULTICH, 12, 1, 0), /* disable this controls since it doesn't work as expected */ /* AC97_SINGLE("IEC958 Input Monitor", AC97_ALC650_MULTICH, 13, 1, 0), */ @@ -2098,12 +2098,12 @@ static int alc655_iec958_route_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_ } static const snd_kcontrol_new_t snd_ac97_spdif_controls_alc655[] = { - AC97_PAGE_SINGLE("IEC958 Capture Switch", AC97_ALC650_MULTICH, 11, 1, 0, 0), + AC97_PAGE_SINGLE(SNDRV_CTL_NAME_IEC958("",CAPTURE,SWITCH), AC97_ALC650_MULTICH, 11, 1, 0, 0), /* disable this controls since it doesn't work as expected */ /* AC97_PAGE_SINGLE("IEC958 Input Monitor", AC97_ALC650_MULTICH, 14, 1, 0, 0), */ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Playback Route", + .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route", .info = alc655_iec958_route_info, .get = alc655_iec958_route_get, .put = alc655_iec958_route_put, @@ -2682,7 +2682,7 @@ static const snd_kcontrol_new_t snd_ac97_controls_it2646[] = { }; static const snd_kcontrol_new_t snd_ac97_spdif_controls_it2646[] = { - AC97_SINGLE("IEC958 Capture Switch", 0x76, 11, 1, 0), + AC97_SINGLE(SNDRV_CTL_NAME_IEC958("",CAPTURE,SWITCH), 0x76, 11, 1, 0), AC97_SINGLE("Analog to IEC958 Output", 0x76, 12, 1, 0), AC97_SINGLE("IEC958 Input Monitor", 0x76, 13, 1, 0), }; diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index f08ae71f902d..cb12d780a6c6 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -1959,9 +1959,9 @@ static int snd_ali5451_spdif_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t static snd_kcontrol_new_t snd_ali5451_mixer_spdif[] __devinitdata = { /* spdif aplayback switch */ /* FIXME: "IEC958 Playback Switch" may conflict with one on ac97_codec */ - ALI5451_SPDIF("IEC958 Output switch", 0, 0), + ALI5451_SPDIF(SNDRV_CTL_NAME_IEC958("Output ",NONE,SWITCH), 0, 0), /* spdif out to spdif channel */ - ALI5451_SPDIF("IEC958 Channel Output Switch", 0, 1), + ALI5451_SPDIF(SNDRV_CTL_NAME_IEC958("Channel Output ",NONE,SWITCH), 0, 1), /* spdif in from spdif channel */ ALI5451_SPDIF(SNDRV_CTL_NAME_IEC958("",CAPTURE,SWITCH), 0, 2) }; diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c index 1546aec02ea7..5eb9e0bb9100 100644 --- a/sound/pci/cs46xx/cs46xx_lib.c +++ b/sound/pci/cs46xx/cs46xx_lib.c @@ -2231,7 +2231,7 @@ static snd_kcontrol_new_t snd_cs46xx_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Output Switch", + .name = SNDRV_CTL_NAME_IEC958("Output ",NONE,SWITCH), .info = snd_mixer_boolean_info, .get = snd_cs46xx_iec958_get, .put = snd_cs46xx_iec958_put, @@ -2239,7 +2239,7 @@ static snd_kcontrol_new_t snd_cs46xx_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Input Switch", + .name = SNDRV_CTL_NAME_IEC958("Input ",NONE,SWITCH), .info = snd_mixer_boolean_info, .get = snd_cs46xx_iec958_get, .put = snd_cs46xx_iec958_put, @@ -2249,7 +2249,7 @@ static snd_kcontrol_new_t snd_cs46xx_controls[] __devinitdata = { /* Input IEC958 volume does not work for the moment. (Benny) */ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Input Volume", + .name = SNDRV_CTL_NAME_IEC958("Input ",NONE,VOLUME), .info = snd_cs46xx_vol_info, .get = snd_cs46xx_vol_iec958_get, .put = snd_cs46xx_vol_iec958_put, diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index 0529fb281125..637c555cfdb1 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -1159,12 +1159,12 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) /* Optical SPDIF Playback Volume */ A_ADD_VOLUME_IN(stereo_mix, gpr, A_EXTIN_OPT_SPDIF_L); A_ADD_VOLUME_IN(stereo_mix+1, gpr+1, A_EXTIN_OPT_SPDIF_R); - snd_emu10k1_init_stereo_control(&controls[nctl++], "IEC958 Optical Playback Volume", gpr, 0); + snd_emu10k1_init_stereo_control(&controls[nctl++], SNDRV_CTL_NAME_IEC958("Optical ",PLAYBACK,VOLUME), gpr, 0); gpr += 2; /* Optical SPDIF Capture Volume */ A_ADD_VOLUME_IN(capture, gpr, A_EXTIN_OPT_SPDIF_L); A_ADD_VOLUME_IN(capture+1, gpr+1, A_EXTIN_OPT_SPDIF_R); - snd_emu10k1_init_stereo_control(&controls[nctl++], "IEC958 Optical Capture Volume", gpr, 0); + snd_emu10k1_init_stereo_control(&controls[nctl++], SNDRV_CTL_NAME_IEC958("Optical ",CAPTURE,VOLUME), gpr, 0); gpr += 2; /* Line2 Playback Volume */ @@ -1389,7 +1389,7 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) A_OP(icode, &ptr, iACC3, A_EXTOUT(A_EXTOUT_FRONT_L + z), A_GPR(tmp + 0), A_GPR(tmp + 1), A_C_00000000); } } - snd_emu10k1_init_stereo_onoff_control(controls + nctl++, "IEC958 Optical Raw Playback Switch", gpr, 0); + snd_emu10k1_init_stereo_onoff_control(controls + nctl++, SNDRV_CTL_NAME_IEC958("Optical Raw ",PLAYBACK,SWITCH), gpr, 0); gpr += 2; A_PUT_STEREO_OUTPUT(A_EXTOUT_REAR_L, A_EXTOUT_REAR_R, playback+2 + SND_EMU10K1_PLAYBACK_CHANNELS); @@ -1716,7 +1716,7 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) /* IEC958 TTL Playback Volume */ for (z = 0; z < 2; z++) VOLUME_ADDIN(icode, &ptr, playback + z, EXTIN_SPDIF_CD_L + z, gpr + z); - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 TTL Playback Volume", gpr, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("TTL ",PLAYBACK,VOLUME), gpr, 0); gpr += 2; /* IEC958 TTL Capture Volume + Switch */ @@ -1724,8 +1724,8 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) SWITCH_IN(icode, &ptr, tmp + 0, EXTIN_SPDIF_CD_L + z, gpr + 2 + z); VOLUME_ADD(icode, &ptr, capture + z, tmp + 0, gpr + z); } - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 TTL Capture Volume", gpr, 0); - snd_emu10k1_init_stereo_onoff_control(controls + i++, "IEC958 TTL Capture Switch", gpr + 2, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("TTL ",CAPTURE,VOLUME), gpr, 0); + snd_emu10k1_init_stereo_onoff_control(controls + i++, SNDRV_CTL_NAME_IEC958("TTL ",CAPTURE,SWITCH), gpr + 2, 0); gpr += 4; } @@ -1750,7 +1750,7 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) /* IEC958 Optical Playback Volume */ for (z = 0; z < 2; z++) VOLUME_ADDIN(icode, &ptr, playback + z, EXTIN_TOSLINK_L + z, gpr + z); - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 LiveDrive Playback Volume", gpr, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("LiveDrive ",PLAYBACK,VOLUME), gpr, 0); gpr += 2; /* IEC958 Optical Capture Volume */ @@ -1758,8 +1758,8 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) SWITCH_IN(icode, &ptr, tmp + 0, EXTIN_TOSLINK_L + z, gpr + 2 + z); VOLUME_ADD(icode, &ptr, capture + z, tmp + 0, gpr + z); } - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 LiveDrive Capture Volume", gpr, 0); - snd_emu10k1_init_stereo_onoff_control(controls + i++, "IEC958 LiveDrive Capture Switch", gpr + 2, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("LiveDrive ",CAPTURE,VOLUME), gpr, 0); + snd_emu10k1_init_stereo_onoff_control(controls + i++, SNDRV_CTL_NAME_IEC958("LiveDrive ",CAPTURE,SWITCH), gpr + 2, 0); gpr += 4; } @@ -1784,7 +1784,7 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) /* IEC958 Coax Playback Volume */ for (z = 0; z < 2; z++) VOLUME_ADDIN(icode, &ptr, playback + z, EXTIN_COAX_SPDIF_L + z, gpr + z); - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 Coaxial Playback Volume", gpr, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("Coaxial ",PLAYBACK,VOLUME), gpr, 0); gpr += 2; /* IEC958 Coax Capture Volume + Switch */ @@ -1792,8 +1792,8 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) SWITCH_IN(icode, &ptr, tmp + 0, EXTIN_COAX_SPDIF_L + z, gpr + 2 + z); VOLUME_ADD(icode, &ptr, capture + z, tmp + 0, gpr + z); } - snd_emu10k1_init_stereo_control(controls + i++, "IEC958 Coaxial Capture Volume", gpr, 0); - snd_emu10k1_init_stereo_onoff_control(controls + i++, "IEC958 Coaxial Capture Switch", gpr + 2, 0); + snd_emu10k1_init_stereo_control(controls + i++, SNDRV_CTL_NAME_IEC958("Coaxial ",CAPTURE,VOLUME), gpr, 0); + snd_emu10k1_init_stereo_onoff_control(controls + i++, SNDRV_CTL_NAME_IEC958("Coaxial ",CAPTURE,SWITCH), gpr + 2, 0); gpr += 4; } @@ -1920,7 +1920,7 @@ static int __devinit _snd_emu10k1_init_efx(emu10k1_t *emu) #endif } - snd_emu10k1_init_stereo_onoff_control(controls + i++, "IEC958 Optical Raw Playback Switch", gpr, 0); + snd_emu10k1_init_stereo_onoff_control(controls + i++, SNDRV_CTL_NAME_IEC958("Optical Raw ",PLAYBACK,SWITCH), gpr, 0); gpr += 2; } diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 0c64e65bee97..bc8272be0031 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -1444,7 +1444,7 @@ static int snd_es1371_spdif_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t /* spdif controls */ static snd_kcontrol_new_t snd_es1371_mixer_spdif[] __devinitdata = { - ES1371_SPDIF("IEC958 Playback Switch"), + ES1371_SPDIF(SNDRV_CTL_NAME_IEC958("",PLAYBACK,SWITCH)), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index ff10e637a95e..36b2f62e8573 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1155,10 +1155,10 @@ FM801_SINGLE("FM Playback Switch", FM801_FM_VOL, 15, 1, 1), static snd_kcontrol_new_t snd_fm801_controls_multi[] __devinitdata = { FM801_SINGLE("AC97 2ch->4ch Copy Switch", FM801_CODEC_CTRL, 7, 1, 0), FM801_SINGLE("AC97 18-bit Switch", FM801_CODEC_CTRL, 10, 1, 0), -FM801_SINGLE("IEC958 Capture Switch", FM801_I2S_MODE, 8, 1, 0), -FM801_SINGLE("IEC958 Raw Data Playback Switch", FM801_I2S_MODE, 9, 1, 0), -FM801_SINGLE("IEC958 Raw Data Capture Switch", FM801_I2S_MODE, 10, 1, 0), -FM801_SINGLE("IEC958 Playback Switch", FM801_GEN_CTRL, 2, 1, 0), +FM801_SINGLE(SNDRV_CTL_NAME_IEC958("",CAPTURE,SWITCH), FM801_I2S_MODE, 8, 1, 0), +FM801_SINGLE(SNDRV_CTL_NAME_IEC958("Raw Data ",PLAYBACK,SWITCH), FM801_I2S_MODE, 9, 1, 0), +FM801_SINGLE(SNDRV_CTL_NAME_IEC958("Raw Data ",CAPTURE,SWITCH), FM801_I2S_MODE, 10, 1, 0), +FM801_SINGLE(SNDRV_CTL_NAME_IEC958("",PLAYBACK,SWITCH), FM801_GEN_CTRL, 2, 1, 0), }; static void snd_fm801_mixer_free_ac97_bus(ac97_bus_t *bus) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 2fd05bb84136..bceb83a42a38 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -572,7 +572,7 @@ static snd_kcontrol_new_t ad1983_mixers[] = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Playback Route", + .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route", .info = ad1983_spdif_route_info, .get = ad1983_spdif_route_get, .put = ad1983_spdif_route_put, @@ -705,7 +705,7 @@ static snd_kcontrol_new_t ad1981_mixers[] = { /* identical with AD1983 */ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Playback Route", + .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route", .info = ad1983_spdif_route_info, .get = ad1983_spdif_route_get, .put = ad1983_spdif_route_put, diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index d51c72b721e0..b97f50d10ba3 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -1422,7 +1422,7 @@ static snd_kcontrol_new_t snd_ice1712_multi_capture_analog_switch __devinitdata static snd_kcontrol_new_t snd_ice1712_multi_capture_spdif_switch __devinitdata = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Multi Capture Switch", + .name = SNDRV_CTL_NAME_IEC958("Multi ",CAPTURE,SWITCH), .info = snd_ice1712_pro_mixer_switch_info, .get = snd_ice1712_pro_mixer_switch_get, .put = snd_ice1712_pro_mixer_switch_put, @@ -1441,7 +1441,7 @@ static snd_kcontrol_new_t snd_ice1712_multi_capture_analog_volume __devinitdata static snd_kcontrol_new_t snd_ice1712_multi_capture_spdif_volume __devinitdata = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Multi Capture Volume", + .name = SNDRV_CTL_NAME_IEC958("Multi ",CAPTURE,VOLUME), .info = snd_ice1712_pro_mixer_volume_info, .get = snd_ice1712_pro_mixer_volume_get, .put = snd_ice1712_pro_mixer_volume_put, @@ -2203,7 +2203,7 @@ static snd_kcontrol_new_t snd_ice1712_mixer_pro_analog_route __devinitdata = { static snd_kcontrol_new_t snd_ice1712_mixer_pro_spdif_route __devinitdata = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "IEC958 Playback Route", + .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route", .info = snd_ice1712_pro_route_info, .get = snd_ice1712_pro_route_spdif_get, .put = snd_ice1712_pro_route_spdif_put, diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index e819b9ffc266..c7af5e5fee13 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -1466,7 +1466,7 @@ static snd_kcontrol_new_t snd_vt1724_spdif_switch __devinitdata = .iface = SNDRV_CTL_ELEM_IFACE_MIXER, /* FIXME: the following conflict with IEC958 Playback Route */ // .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,SWITCH), - .name = "IEC958 Output Switch", + .name = SNDRV_CTL_NAME_IEC958("Output ",NONE,SWITCH), .info = snd_vt1724_spdif_sw_info, .get = snd_vt1724_spdif_sw_get, .put = snd_vt1724_spdif_sw_put diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 6e1c29f5799a..8ddc023a5b7f 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -1484,7 +1484,7 @@ static int snd_via8233_dxs3_spdif_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_val } static snd_kcontrol_new_t snd_via8233_dxs3_spdif_control __devinitdata = { - .name = "IEC958 Output Switch", + .name = SNDRV_CTL_NAME_IEC958("Output ",NONE,SWITCH), .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_via8233_dxs3_spdif_info, .get = snd_via8233_dxs3_spdif_get, From 5549d54992391d81a8cbfbaac45a958876fbc9cb Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 3 Aug 2005 13:50:30 +0200 Subject: [PATCH 519/584] [ALSA] use PCM interface for IEC958 controls Digigram VX core,ENS1370/1+ driver,CA0106 driver,EMU10K1/EMU10K2 driver RME HDSP driver,RME9652 driver For consistency, use the PCM interface instead of MIXER for IEC958 default/mask/stream mixer controls. Signed-off-by: Clemens Ladisch --- sound/drivers/vx/vx_mixer.c | 4 ++-- sound/pci/ca0106/ca0106_mixer.c | 4 ++-- sound/pci/emu10k1/emu10k1x.c | 4 ++-- sound/pci/emu10k1/emumixer.c | 8 ++++++-- sound/pci/ens1370.c | 6 +++--- sound/pci/rme9652/hdsp.c | 8 ++++---- sound/pci/rme9652/rme9652.c | 8 ++++---- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/sound/drivers/vx/vx_mixer.c b/sound/drivers/vx/vx_mixer.c index f00c88886460..19fc68c23378 100644 --- a/sound/drivers/vx/vx_mixer.c +++ b/sound/drivers/vx/vx_mixer.c @@ -796,14 +796,14 @@ static int vx_iec958_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontro static snd_kcontrol_new_t vx_control_iec958_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .info = vx_iec958_info, /* shared */ .get = vx_iec958_mask_get, }; static snd_kcontrol_new_t vx_control_iec958 = { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = vx_iec958_info, .get = vx_iec958_get, diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index 0e5e9ce0ff28..b6b8882ce704 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -297,7 +297,7 @@ static int snd_ca0106_spdif_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_ca0106_spdif_mask_control = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .count = 4, .info = snd_ca0106_spdif_info, @@ -306,7 +306,7 @@ static snd_kcontrol_new_t snd_ca0106_spdif_mask_control = static snd_kcontrol_new_t snd_ca0106_spdif_control = { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .count = 4, .info = snd_ca0106_spdif_info, diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index e90c5ddd1d17..52c7826df440 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1183,7 +1183,7 @@ static int snd_emu10k1x_spdif_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_emu10k1x_spdif_mask_control = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .count = 3, .info = snd_emu10k1x_spdif_info, @@ -1192,7 +1192,7 @@ static snd_kcontrol_new_t snd_emu10k1x_spdif_mask_control = static snd_kcontrol_new_t snd_emu10k1x_spdif_control = { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .count = 3, .info = snd_emu10k1x_spdif_info, diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index 279edaeb0663..d71a72e84bcc 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -181,7 +181,7 @@ static int snd_emu10k1_spdif_put(snd_kcontrol_t * kcontrol, static snd_kcontrol_new_t snd_emu10k1_spdif_mask_control = { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .count = 4, .info = snd_emu10k1_spdif_info, @@ -190,7 +190,7 @@ static snd_kcontrol_new_t snd_emu10k1_spdif_mask_control = static snd_kcontrol_new_t snd_emu10k1_spdif_control = { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .count = 4, .info = snd_emu10k1_spdif_info, @@ -931,10 +931,14 @@ int __devinit snd_emu10k1_mixer(emu10k1_t *emu, /* sb live! and audigy */ if ((kctl = snd_ctl_new1(&snd_emu10k1_spdif_mask_control, emu)) == NULL) return -ENOMEM; + if (!emu->audigy) + kctl->id.device = emu->pcm_efx->device; if ((err = snd_ctl_add(card, kctl))) return err; if ((kctl = snd_ctl_new1(&snd_emu10k1_spdif_control, emu)) == NULL) return -ENOMEM; + if (!emu->audigy) + kctl->id.device = emu->pcm_efx->device; if ((err = snd_ctl_add(card, kctl))) return err; } diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index bc8272be0031..f06b95f41a1d 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -1446,7 +1446,7 @@ static int snd_es1371_spdif_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t static snd_kcontrol_new_t snd_es1371_mixer_spdif[] __devinitdata = { ES1371_SPDIF(SNDRV_CTL_NAME_IEC958("",PLAYBACK,SWITCH)), { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_default_get, @@ -1454,13 +1454,13 @@ static snd_kcontrol_new_t snd_es1371_mixer_spdif[] __devinitdata = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_mask_get }, { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_ens1373_spdif_info, .get = snd_ens1373_spdif_stream_get, diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 60eef845a074..6694866089b5 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -3104,7 +3104,7 @@ HDSP_XLR_BREAKOUT_CABLE("XLR Breakout Cable", 0) static snd_kcontrol_new_t snd_hdsp_controls[] = { { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_hdsp_control_spdif_info, .get = snd_hdsp_control_spdif_get, @@ -3112,7 +3112,7 @@ static snd_kcontrol_new_t snd_hdsp_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_hdsp_control_spdif_stream_info, .get = snd_hdsp_control_spdif_stream_get, @@ -3120,7 +3120,7 @@ static snd_kcontrol_new_t snd_hdsp_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_hdsp_control_spdif_mask_info, .get = snd_hdsp_control_spdif_mask_get, @@ -3130,7 +3130,7 @@ static snd_kcontrol_new_t snd_hdsp_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK), .info = snd_hdsp_control_spdif_mask_info, .get = snd_hdsp_control_spdif_mask_get, diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 6e3a91dba3eb..8ee4d6fd6ea7 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -1529,7 +1529,7 @@ static int snd_rme9652_get_tc_value(void *private_data, static snd_kcontrol_new_t snd_rme9652_controls[] = { { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = snd_rme9652_control_spdif_info, .get = snd_rme9652_control_spdif_get, @@ -1537,7 +1537,7 @@ static snd_kcontrol_new_t snd_rme9652_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PCM_STREAM), .info = snd_rme9652_control_spdif_stream_info, .get = snd_rme9652_control_spdif_stream_get, @@ -1545,7 +1545,7 @@ static snd_kcontrol_new_t snd_rme9652_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK), .info = snd_rme9652_control_spdif_mask_info, .get = snd_rme9652_control_spdif_mask_get, @@ -1555,7 +1555,7 @@ static snd_kcontrol_new_t snd_rme9652_controls[] = { }, { .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK), .info = snd_rme9652_control_spdif_mask_info, .get = snd_rme9652_control_spdif_mask_get, From 5b8f7f7329289fa3b74ef8cdbaa9d2929787d3e6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Aug 2005 14:02:47 +0200 Subject: [PATCH 520/584] [ALSA] ad1816a - Add clockfreq module option Documentation,AD1816A driver Added clockfreq module option for the card with a different clock frequency than 33kHz. Signed-off-by: Takashi Iwai --- include/sound/ad1816a.h | 1 + sound/isa/ad1816a/ad1816a.c | 5 +++++ sound/isa/ad1816a/ad1816a_lib.c | 14 ++++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/sound/ad1816a.h b/include/sound/ad1816a.h index 395978e375cf..ca2e0e4fa937 100644 --- a/include/sound/ad1816a.h +++ b/include/sound/ad1816a.h @@ -138,6 +138,7 @@ struct _snd_ad1816a { spinlock_t lock; unsigned short mode; + unsigned int clock_freq; snd_card_t *card; snd_pcm_t *pcm; diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 563296d02894..0eb442ca23d6 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -53,6 +53,7 @@ static int irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ; /* Pnp setup */ static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ; /* Pnp setup */ static int dma1[SNDRV_CARDS] = SNDRV_DEFAULT_DMA; /* PnP setup */ static int dma2[SNDRV_CARDS] = SNDRV_DEFAULT_DMA; /* PnP setup */ +static int clockfreq[SNDRV_CARDS]; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for ad1816a based soundcard."); @@ -74,6 +75,8 @@ module_param_array(dma1, int, NULL, 0444); MODULE_PARM_DESC(dma1, "1st DMA # for ad1816a driver."); module_param_array(dma2, int, NULL, 0444); MODULE_PARM_DESC(dma2, "2nd DMA # for ad1816a driver."); +module_param_array(clockfreq, int, NULL, 0444); +MODULE_PARM_DESC(clockfreq, "Clock frequency for ad1816a driver (default = 0)."); struct snd_card_ad1816a { struct pnp_dev *dev; @@ -209,6 +212,8 @@ static int __devinit snd_card_ad1816a_probe(int dev, struct pnp_card_link *pcard snd_card_free(card); return error; } + if (clockfreq[dev] >= 5000 && clockfreq[dev] <= 100000) + chip->clock_freq = clockfreq[dev]; strcpy(card->driver, "AD1816A"); strcpy(card->shortname, "ADI SoundPort AD1816A"); diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c index 625b2eff14a1..ae860360ecf9 100644 --- a/sound/isa/ad1816a/ad1816a_lib.c +++ b/sound/isa/ad1816a/ad1816a_lib.c @@ -234,7 +234,7 @@ static int snd_ad1816a_playback_prepare(snd_pcm_substream_t *substream) ad1816a_t *chip = snd_pcm_substream_chip(substream); unsigned long flags; snd_pcm_runtime_t *runtime = substream->runtime; - unsigned int size; + unsigned int size, rate; spin_lock_irqsave(&chip->lock, flags); @@ -245,7 +245,10 @@ static int snd_ad1816a_playback_prepare(snd_pcm_substream_t *substream) snd_dma_program(chip->dma1, runtime->dma_addr, size, DMA_MODE_WRITE | DMA_AUTOINIT); - snd_ad1816a_write(chip, AD1816A_PLAYBACK_SAMPLE_RATE, runtime->rate); + rate = runtime->rate; + if (chip->clock_freq) + rate = (rate * 33000) / chip->clock_freq; + snd_ad1816a_write(chip, AD1816A_PLAYBACK_SAMPLE_RATE, rate); snd_ad1816a_out_mask(chip, AD1816A_PLAYBACK_CONFIG, AD1816A_FMT_ALL | AD1816A_FMT_STEREO, snd_ad1816a_get_format(chip, runtime->format, @@ -263,7 +266,7 @@ static int snd_ad1816a_capture_prepare(snd_pcm_substream_t *substream) ad1816a_t *chip = snd_pcm_substream_chip(substream); unsigned long flags; snd_pcm_runtime_t *runtime = substream->runtime; - unsigned int size; + unsigned int size, rate; spin_lock_irqsave(&chip->lock, flags); @@ -274,7 +277,10 @@ static int snd_ad1816a_capture_prepare(snd_pcm_substream_t *substream) snd_dma_program(chip->dma2, runtime->dma_addr, size, DMA_MODE_READ | DMA_AUTOINIT); - snd_ad1816a_write(chip, AD1816A_CAPTURE_SAMPLE_RATE, runtime->rate); + rate = runtime->rate; + if (chip->clock_freq) + rate = (rate * 33000) / chip->clock_freq; + snd_ad1816a_write(chip, AD1816A_CAPTURE_SAMPLE_RATE, rate); snd_ad1816a_out_mask(chip, AD1816A_CAPTURE_CONFIG, AD1816A_FMT_ALL | AD1816A_FMT_STEREO, snd_ad1816a_get_format(chip, runtime->format, From 4a677ac5d0a4fad286cdfb4890a5e2de61534b1c Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 5 Aug 2005 10:24:36 +0200 Subject: [PATCH 521/584] [ALSA] PXA27x AC97 warm reset ARM PXA2XX driver This patch extends the period of an AC97 warm reset on the PXA27x from 50uS to 500uS. The shorter reset didn't always guarantee that the codec would wake up. Changes:- o Change pxa27x warm reset period to 500uS o Removed double semi-colon. Signed-off-by: Liam Girdwood Signed-off-by: Takashi Iwai --- sound/arm/pxa2xx-ac97.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c index 46052304e230..b605a24946a3 100644 --- a/sound/arm/pxa2xx-ac97.c +++ b/sound/arm/pxa2xx-ac97.c @@ -132,9 +132,9 @@ static void pxa2xx_ac97_reset(ac97_t *ac97) udelay(10); GCR |= GCR_WARM_RST; pxa_gpio_mode(113 | GPIO_ALT_FN_2_OUT); - udelay(50); + udelay(500); #else - GCR |= GCR_WARM_RST|GCR_PRIRDY_IEN|GCR_SECRDY_IEN;; + GCR |= GCR_WARM_RST|GCR_PRIRDY_IEN|GCR_SECRDY_IEN; wait_event_timeout(gsr_wq, gsr_bits & (GSR_PCR | GSR_SCR), 1); #endif From 064d2112ff24937f9aabb6baae8de88b6e5ef453 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 5 Aug 2005 10:25:08 +0200 Subject: [PATCH 522/584] [ALSA] WM9713 modem detection AC97 Codec This patch fixes a problem whereby the WM9713 has modem functionality incorrectly detected after an AC97 cold reset. Changes:- o Cleared AC97_SCAP_MODEM in wm9713 scaps Signed-off-by: Liam Girdwood Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_patch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 00fb51992460..a51b61d5066b 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -786,6 +786,7 @@ int patch_wolfson13(ac97_t * ac97) ac97->flags |= AC97_HAS_NO_REC_GAIN | AC97_STEREO_MUTES | AC97_HAS_NO_PHONE | AC97_HAS_NO_PC_BEEP | AC97_HAS_NO_VIDEO | AC97_HAS_NO_CD | AC97_HAS_NO_TONE | AC97_HAS_NO_STD_PCM; + ac97->scaps &= ~AC97_SCAP_MODEM; snd_ac97_write_cache(ac97, AC97_EXTENDED_MID, 0xda00); snd_ac97_write_cache(ac97, AC97_EXTENDED_MSTATUS, 0x3810); From a55bfdc5821df787068da15a6864f2c669d7d22c Mon Sep 17 00:00:00 2001 From: Dirk Opfer Date: Mon, 8 Aug 2005 16:29:43 +0200 Subject: [PATCH 523/584] [ALSA] Fix compiler warnings in PXA2XX-AC97 ARM PXA2XX driver - change pxa2xx_ac97_do_suspend and pxa2xx_ac97_do_resume to use the expected arguments Signed-off-by: Dirk Opfer Signed-off-by: Takashi Iwai --- sound/arm/pxa2xx-ac97.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c index b605a24946a3..29450befb5da 100644 --- a/sound/arm/pxa2xx-ac97.c +++ b/sound/arm/pxa2xx-ac97.c @@ -261,7 +261,7 @@ static int pxa2xx_ac97_do_suspend(snd_card_t *card, unsigned int state) return 0; } -static int pxa2xx_ac97_do_resume(snd_card_t *card, unsigned int state) +static int pxa2xx_ac97_do_resume(snd_card_t *card) { if (card->power_state != SNDRV_CTL_POWER_D0) { pxa2xx_audio_ops_t *platform_ops = card->dev->platform_data; @@ -275,13 +275,13 @@ static int pxa2xx_ac97_do_resume(snd_card_t *card, unsigned int state) return 0; } -static int pxa2xx_ac97_suspend(struct device *_dev, u32 state, u32 level) +static int pxa2xx_ac97_suspend(struct device *_dev, pm_message_t state, u32 level) { snd_card_t *card = dev_get_drvdata(_dev); int ret = 0; if (card && level == SUSPEND_DISABLE) - ret = pxa2xx_ac97_do_suspend(card, SNDRV_CTL_POWER_D3cold); + ret = pxa2xx_ac97_do_suspend(card, PMSG_SUSPEND); return ret; } @@ -292,7 +292,7 @@ static int pxa2xx_ac97_resume(struct device *_dev, u32 level) int ret = 0; if (card && level == RESUME_ENABLE) - ret = pxa2xx_ac97_do_resume(card, SNDRV_CTL_POWER_D0); + ret = pxa2xx_ac97_do_resume(card); return ret; } From 9bcf655109ae06a8e652671a0de6fe2da5c213c2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 10 Aug 2005 10:21:43 +0200 Subject: [PATCH 524/584] [ALSA] ymfpci: add per-voice volume controls YMFPCI driver Implements mixer controls for the volume of each playback substream of the main PCM device. Signed-off-by: Clemens Ladisch --- include/sound/ymfpci.h | 6 + sound/pci/ymfpci/ymfpci_main.c | 230 +++++++++++++++++++++++---------- 2 files changed, 165 insertions(+), 71 deletions(-) diff --git a/include/sound/ymfpci.h b/include/sound/ymfpci.h index 4b570684a6aa..9a3c1e6c820a 100644 --- a/include/sound/ymfpci.h +++ b/include/sound/ymfpci.h @@ -295,6 +295,7 @@ struct _snd_ymfpci_pcm { unsigned int running: 1; unsigned int output_front: 1; unsigned int output_rear: 1; + unsigned int update_pcm_vol; u32 period_size; /* cached from runtime->period_size */ u32 buffer_size; /* cached from runtime->buffer_size */ u32 period_pos; @@ -367,6 +368,11 @@ struct _snd_ymfpci { int mode_dup4ch; int rear_opened; int spdif_opened; + struct { + u16 left; + u16 right; + snd_kcontrol_t *ctl; + } pcm_mixer[32]; spinlock_t reg_lock; spinlock_t voice_lock; diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index d54f88a1b525..054836412dc4 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -321,6 +321,26 @@ static void snd_ymfpci_pcm_interrupt(ymfpci_t *chip, ymfpci_voice_t *voice) snd_pcm_period_elapsed(ypcm->substream); spin_lock(&chip->reg_lock); } + + if (unlikely(ypcm->update_pcm_vol)) { + unsigned int subs = ypcm->substream->number; + unsigned int next_bank = 1 - chip->active_bank; + snd_ymfpci_playback_bank_t *bank; + u32 volume; + + bank = &voice->bank[next_bank]; + volume = cpu_to_le32(chip->pcm_mixer[subs].left << 15); + bank->left_gain_end = volume; + if (ypcm->output_rear) + bank->eff2_gain_end = volume; + if (ypcm->voices[1]) + bank = &ypcm->voices[1]->bank[next_bank]; + volume = cpu_to_le32(chip->pcm_mixer[subs].right << 15); + bank->right_gain_end = volume; + if (ypcm->output_rear) + bank->eff3_gain_end = volume; + ypcm->update_pcm_vol--; + } } spin_unlock(&chip->reg_lock); } @@ -451,87 +471,74 @@ static int snd_ymfpci_pcm_voice_alloc(ymfpci_pcm_t *ypcm, int voices) return 0; } -static void snd_ymfpci_pcm_init_voice(ymfpci_voice_t *voice, int stereo, - int rate, int w_16, unsigned long addr, - unsigned int end, - int output_front, int output_rear) +static void snd_ymfpci_pcm_init_voice(ymfpci_pcm_t *ypcm, unsigned int voiceidx, + snd_pcm_runtime_t *runtime, + int has_pcm_volume) { + ymfpci_voice_t *voice = ypcm->voices[voiceidx]; u32 format; - u32 delta = snd_ymfpci_calc_delta(rate); - u32 lpfQ = snd_ymfpci_calc_lpfQ(rate); - u32 lpfK = snd_ymfpci_calc_lpfK(rate); + u32 delta = snd_ymfpci_calc_delta(runtime->rate); + u32 lpfQ = snd_ymfpci_calc_lpfQ(runtime->rate); + u32 lpfK = snd_ymfpci_calc_lpfK(runtime->rate); snd_ymfpci_playback_bank_t *bank; unsigned int nbank; + u32 vol_left, vol_right; + u8 use_left, use_right; snd_assert(voice != NULL, return); - format = (stereo ? 0x00010000 : 0) | (w_16 ? 0 : 0x80000000); + if (runtime->channels == 1) { + use_left = 1; + use_right = 1; + } else { + use_left = (voiceidx & 1) == 0; + use_right = !use_left; + } + if (has_pcm_volume) { + vol_left = cpu_to_le32(ypcm->chip->pcm_mixer + [ypcm->substream->number].left << 15); + vol_right = cpu_to_le32(ypcm->chip->pcm_mixer + [ypcm->substream->number].right << 15); + } else { + vol_left = cpu_to_le32(0x40000000); + vol_right = cpu_to_le32(0x40000000); + } + format = runtime->channels == 2 ? 0x00010000 : 0; + if (snd_pcm_format_width(runtime->format) == 8) + format |= 0x80000000; + if (runtime->channels == 2 && (voiceidx & 1) != 0) + format |= 1; for (nbank = 0; nbank < 2; nbank++) { bank = &voice->bank[nbank]; + memset(bank, 0, sizeof(*bank)); bank->format = cpu_to_le32(format); - bank->loop_default = 0; - bank->base = cpu_to_le32(addr); - bank->loop_start = 0; - bank->loop_end = cpu_to_le32(end); - bank->loop_frac = 0; - bank->eg_gain_end = cpu_to_le32(0x40000000); + bank->base = cpu_to_le32(runtime->dma_addr); + bank->loop_end = cpu_to_le32(ypcm->buffer_size); bank->lpfQ = cpu_to_le32(lpfQ); - bank->status = 0; - bank->num_of_frames = 0; - bank->loop_count = 0; - bank->start = 0; - bank->start_frac = 0; bank->delta = bank->delta_end = cpu_to_le32(delta); bank->lpfK = bank->lpfK_end = cpu_to_le32(lpfK); - bank->eg_gain = cpu_to_le32(0x40000000); - bank->lpfD1 = - bank->lpfD2 = 0; + bank->eg_gain = + bank->eg_gain_end = cpu_to_le32(0x40000000); - bank->left_gain = - bank->right_gain = - bank->left_gain_end = - bank->right_gain_end = - bank->eff1_gain = - bank->eff2_gain = - bank->eff3_gain = - bank->eff1_gain_end = - bank->eff2_gain_end = - bank->eff3_gain_end = 0; - - if (!stereo) { - if (output_front) { - bank->left_gain = + if (ypcm->output_front) { + if (use_left) { + bank->left_gain = + bank->left_gain_end = vol_left; + } + if (use_right) { bank->right_gain = - bank->left_gain_end = - bank->right_gain_end = cpu_to_le32(0x40000000); + bank->right_gain_end = vol_right; } - if (output_rear) { + } + if (ypcm->output_rear) { + if (use_left) { bank->eff2_gain = - bank->eff2_gain_end = + bank->eff2_gain_end = vol_left; + } + if (use_right) { bank->eff3_gain = - bank->eff3_gain_end = cpu_to_le32(0x40000000); - } - } else { - if (output_front) { - if ((voice->number & 1) == 0) { - bank->left_gain = - bank->left_gain_end = cpu_to_le32(0x40000000); - } else { - bank->format |= cpu_to_le32(1); - bank->right_gain = - bank->right_gain_end = cpu_to_le32(0x40000000); - } - } - if (output_rear) { - if ((voice->number & 1) == 0) { - bank->eff3_gain = - bank->eff3_gain_end = cpu_to_le32(0x40000000); - } else { - bank->format |= cpu_to_le32(1); - bank->eff2_gain = - bank->eff2_gain_end = cpu_to_le32(0x40000000); - } + bank->eff3_gain_end = vol_right; } } } @@ -613,7 +620,7 @@ static int snd_ymfpci_playback_hw_free(snd_pcm_substream_t * substream) static int snd_ymfpci_playback_prepare(snd_pcm_substream_t * substream) { - // ymfpci_t *chip = snd_pcm_substream_chip(substream); + ymfpci_t *chip = snd_pcm_substream_chip(substream); snd_pcm_runtime_t *runtime = substream->runtime; ymfpci_pcm_t *ypcm = runtime->private_data; unsigned int nvoice; @@ -623,14 +630,8 @@ static int snd_ymfpci_playback_prepare(snd_pcm_substream_t * substream) ypcm->period_pos = 0; ypcm->last_pos = 0; for (nvoice = 0; nvoice < runtime->channels; nvoice++) - snd_ymfpci_pcm_init_voice(ypcm->voices[nvoice], - runtime->channels == 2, - runtime->rate, - snd_pcm_format_width(runtime->format) == 16, - runtime->dma_addr, - ypcm->buffer_size, - ypcm->output_front, - ypcm->output_rear); + snd_ymfpci_pcm_init_voice(ypcm, nvoice, runtime, + substream->pcm == chip->pcm); return 0; } @@ -882,6 +883,7 @@ static int snd_ymfpci_playback_open(snd_pcm_substream_t * substream) ymfpci_t *chip = snd_pcm_substream_chip(substream); snd_pcm_runtime_t *runtime = substream->runtime; ymfpci_pcm_t *ypcm; + snd_kcontrol_t *kctl; int err; if ((err = snd_ymfpci_playback_open_1(substream)) < 0) @@ -895,6 +897,10 @@ static int snd_ymfpci_playback_open(snd_pcm_substream_t * substream) chip->rear_opened++; } spin_unlock_irq(&chip->reg_lock); + + kctl = chip->pcm_mixer[substream->number].ctl; + kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; + snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_INFO, &kctl->id); return 0; } @@ -987,6 +993,7 @@ static int snd_ymfpci_playback_close(snd_pcm_substream_t * substream) { ymfpci_t *chip = snd_pcm_substream_chip(substream); ymfpci_pcm_t *ypcm = substream->runtime->private_data; + snd_kcontrol_t *kctl; spin_lock_irq(&chip->reg_lock); if (ypcm->output_rear && chip->rear_opened > 0) { @@ -994,6 +1001,9 @@ static int snd_ymfpci_playback_close(snd_pcm_substream_t * substream) ymfpci_close_extension(chip); } spin_unlock_irq(&chip->reg_lock); + kctl = chip->pcm_mixer[substream->number].ctl; + kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE; + snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_INFO, &kctl->id); return snd_ymfpci_playback_close_1(substream); } @@ -1665,6 +1675,66 @@ static snd_kcontrol_new_t snd_ymfpci_rear_shared __devinitdata = { .private_value = 2, }; +/* + * PCM voice volume + */ + +static int snd_ymfpci_pcm_vol_info(snd_kcontrol_t *kcontrol, + snd_ctl_elem_info_t *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 2; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 0x8000; + return 0; +} + +static int snd_ymfpci_pcm_vol_get(snd_kcontrol_t *kcontrol, + snd_ctl_elem_value_t *ucontrol) +{ + ymfpci_t *chip = snd_kcontrol_chip(kcontrol); + unsigned int subs = kcontrol->id.subdevice; + + ucontrol->value.integer.value[0] = chip->pcm_mixer[subs].left; + ucontrol->value.integer.value[1] = chip->pcm_mixer[subs].right; + return 0; +} + +static int snd_ymfpci_pcm_vol_put(snd_kcontrol_t *kcontrol, + snd_ctl_elem_value_t *ucontrol) +{ + ymfpci_t *chip = snd_kcontrol_chip(kcontrol); + unsigned int subs = kcontrol->id.subdevice; + snd_pcm_substream_t *substream; + unsigned long flags; + + if (ucontrol->value.integer.value[0] != chip->pcm_mixer[subs].left || + ucontrol->value.integer.value[1] != chip->pcm_mixer[subs].right) { + chip->pcm_mixer[subs].left = ucontrol->value.integer.value[0]; + chip->pcm_mixer[subs].right = ucontrol->value.integer.value[1]; + + substream = (snd_pcm_substream_t *)kcontrol->private_value; + spin_lock_irqsave(&chip->voice_lock, flags); + if (substream->runtime && substream->runtime->private_data) { + ymfpci_pcm_t *ypcm = substream->runtime->private_data; + ypcm->update_pcm_vol = 2; + } + spin_unlock_irqrestore(&chip->voice_lock, flags); + return 1; + } + return 0; +} + +static snd_kcontrol_new_t snd_ymfpci_pcm_volume __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "PCM Playback Volume", + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_INACTIVE, + .info = snd_ymfpci_pcm_vol_info, + .get = snd_ymfpci_pcm_vol_get, + .put = snd_ymfpci_pcm_vol_put, +}; + /* * Mixer routines @@ -1686,6 +1756,7 @@ int __devinit snd_ymfpci_mixer(ymfpci_t *chip, int rear_switch) { ac97_template_t ac97; snd_kcontrol_t *kctl; + snd_pcm_substream_t *substream; unsigned int idx; int err; static ac97_bus_ops_t ops = { @@ -1739,6 +1810,23 @@ int __devinit snd_ymfpci_mixer(ymfpci_t *chip, int rear_switch) return err; } + /* per-voice volume */ + substream = chip->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; + for (idx = 0; idx < 32; ++idx) { + kctl = snd_ctl_new1(&snd_ymfpci_pcm_volume, chip); + if (!kctl) + return -ENOMEM; + kctl->id.device = chip->pcm->device; + kctl->id.subdevice = idx; + kctl->private_value = (unsigned long)substream; + if ((err = snd_ctl_add(chip->card, kctl)) < 0) + return err; + chip->pcm_mixer[idx].left = 0x8000; + chip->pcm_mixer[idx].right = 0x8000; + chip->pcm_mixer[idx].ctl = kctl; + substream = substream->next; + } + return 0; } From 443feb882679e21ba5d1e0ff9eff067ac26d9461 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Wed, 10 Aug 2005 11:18:19 +0200 Subject: [PATCH 525/584] [ALSA] ALSA's struct _snd_pcm_substream: Obsolete open_flag PCM Midlevel,ALSA<-OSS emulation,USB USX2Y This patch removes open_flag from struct _snd_pcm_substream. All of its uses are substituted by querying struct _snd_pcm_substream's member ffile instead. Signed-off-by: Karsten Wiese Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 - sound/core/oss/pcm_oss.c | 11 ++++------- sound/core/pcm_lib.c | 16 ++++++++++++++-- sound/core/pcm_native.c | 10 +++------- sound/usb/usx2y/usx2yhwdeppcm.c | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index d935417575b5..fa23ebfb857a 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -379,7 +379,6 @@ struct _snd_pcm_substream { unsigned int dma_buf_id; size_t dma_max; /* -- hardware operations -- */ - unsigned int open_flag: 1; /* lowlevel device has been opened */ snd_pcm_ops_t *ops; /* -- runtime information -- */ snd_pcm_runtime_t *runtime; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index de7444c586f9..a13bd7bb4c9f 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1705,13 +1705,12 @@ static int snd_pcm_oss_release_file(snd_pcm_oss_file_t *pcm_oss_file) if (snd_pcm_running(substream)) snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); snd_pcm_stream_unlock_irq(substream); - if (substream->open_flag) { + if (substream->ffile != NULL) { if (substream->ops->hw_free != NULL) substream->ops->hw_free(substream); substream->ops->close(substream); - substream->open_flag = 0; + substream->ffile = NULL; } - substream->ffile = NULL; snd_pcm_oss_release_substream(substream); snd_pcm_release_substream(substream); } @@ -1778,14 +1777,13 @@ static int snd_pcm_oss_open_file(struct file *file, snd_pcm_oss_release_file(pcm_oss_file); return err; } - psubstream->open_flag = 1; + psubstream->ffile = file; err = snd_pcm_hw_constraints_complete(psubstream); if (err < 0) { snd_printd("snd_pcm_hw_constraint_complete failed\n"); snd_pcm_oss_release_file(pcm_oss_file); return err; } - psubstream->ffile = file; snd_pcm_oss_init_substream(psubstream, psetup, minor); } if (csubstream != NULL) { @@ -1800,14 +1798,13 @@ static int snd_pcm_oss_open_file(struct file *file, snd_pcm_oss_release_file(pcm_oss_file); return err; } - csubstream->open_flag = 1; + csubstream->ffile = file; err = snd_pcm_hw_constraints_complete(csubstream); if (err < 0) { snd_printd("snd_pcm_hw_constraint_complete failed\n"); snd_pcm_oss_release_file(pcm_oss_file); return err; } - csubstream->ffile = file; snd_pcm_oss_init_substream(csubstream, csetup, minor); } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index c5bfd0918cff..c41ec2e9f201 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2063,7 +2063,7 @@ static snd_pcm_sframes_t snd_pcm_lib_write1(snd_pcm_substream_t *substream, if (((avail < runtime->control->avail_min && size > avail) || (size >= runtime->xfer_align && avail < runtime->xfer_align))) { wait_queue_t wait; - enum { READY, SIGNALED, ERROR, SUSPENDED, EXPIRED } state; + enum { READY, SIGNALED, ERROR, SUSPENDED, EXPIRED, DROPPED } state; long tout; if (nonblock) { @@ -2097,6 +2097,9 @@ static snd_pcm_sframes_t snd_pcm_lib_write1(snd_pcm_substream_t *substream, case SNDRV_PCM_STATE_SUSPENDED: state = SUSPENDED; goto _end_loop; + case SNDRV_PCM_STATE_SETUP: + state = DROPPED; + goto _end_loop; default: break; } @@ -2123,6 +2126,9 @@ static snd_pcm_sframes_t snd_pcm_lib_write1(snd_pcm_substream_t *substream, snd_printd("playback write error (DMA or IRQ trouble?)\n"); err = -EIO; goto _end_unlock; + case DROPPED: + err = -EBADFD; + goto _end_unlock; default: break; } @@ -2359,7 +2365,7 @@ static snd_pcm_sframes_t snd_pcm_lib_read1(snd_pcm_substream_t *substream, } else if ((avail < runtime->control->avail_min && size > avail) || (size >= runtime->xfer_align && avail < runtime->xfer_align)) { wait_queue_t wait; - enum { READY, SIGNALED, ERROR, SUSPENDED, EXPIRED } state; + enum { READY, SIGNALED, ERROR, SUSPENDED, EXPIRED, DROPPED } state; long tout; if (nonblock) { @@ -2394,6 +2400,9 @@ static snd_pcm_sframes_t snd_pcm_lib_read1(snd_pcm_substream_t *substream, goto _end_loop; case SNDRV_PCM_STATE_DRAINING: goto __draining; + case SNDRV_PCM_STATE_SETUP: + state = DROPPED; + goto _end_loop; default: break; } @@ -2420,6 +2429,9 @@ static snd_pcm_sframes_t snd_pcm_lib_read1(snd_pcm_substream_t *substream, snd_printd("capture read error (DMA or IRQ trouble?)\n"); err = -EIO; goto _end_unlock; + case DROPPED: + err = -EBADFD; + goto _end_unlock; default: break; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 10c2c9832649..5041be25d75b 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1967,13 +1967,12 @@ static int snd_pcm_release_file(snd_pcm_file_t * pcm_file) runtime = substream->runtime; str = substream->pstr; snd_pcm_unlink(substream); - if (substream->open_flag) { + if (substream->ffile != NULL) { if (substream->ops->hw_free != NULL) substream->ops->hw_free(substream); substream->ops->close(substream); - substream->open_flag = 0; + substream->ffile = NULL; } - substream->ffile = NULL; snd_pcm_remove_file(str, pcm_file); snd_pcm_release_substream(substream); kfree(pcm_file); @@ -2022,18 +2021,15 @@ static int snd_pcm_open_file(struct file *file, snd_pcm_release_file(pcm_file); return err; } - substream->open_flag = 1; + substream->ffile = file; err = snd_pcm_hw_constraints_complete(substream); if (err < 0) { snd_printd("snd_pcm_hw_constraints_complete failed\n"); - substream->ops->close(substream); snd_pcm_release_file(pcm_file); return err; } - substream->ffile = file; - file->private_data = pcm_file; *rpcm_file = pcm_file; return 0; diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index ef28061287f2..d0199c4e5551 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -624,7 +624,7 @@ static int usX2Y_pcms_lock_check(snd_card_t *card) for (s = 0; s < 2; ++s) { snd_pcm_substream_t *substream; substream = pcm->streams[s].substream; - if (substream && substream->open_flag) + if (substream && substream->ffile != NULL) err = -EBUSY; } } From 673b683a07272bdc1f757aa32784b9fcc4b3a014 Mon Sep 17 00:00:00 2001 From: Sasha Khapyorsky Date: Thu, 11 Aug 2005 11:00:16 +0200 Subject: [PATCH 526/584] ALSA CVS update HDA Codec driver,HDA generic driver Summar: hda-codec - MFG support This adds Modem Functional Group (MFG) support and option for 9600 sample rate. Signed-off-by: Sasha Khapyorsky Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 28 ++++++++++++++++------------ sound/pci/hda/hda_codec.h | 1 + sound/pci/hda/hda_generic.c | 5 +++++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 026ae726d875..6bfb081d12dd 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -432,22 +432,26 @@ void snd_hda_get_codec_name(struct hda_codec *codec, } /* - * look for an AFG node - * - * return 0 if not found + * look for an AFG and MFG nodes */ -static int look_for_afg_node(struct hda_codec *codec) +static void setup_fg_nodes(struct hda_codec *codec) { int i, total_nodes; hda_nid_t nid; total_nodes = snd_hda_get_sub_nodes(codec, AC_NODE_ROOT, &nid); for (i = 0; i < total_nodes; i++, nid++) { - if ((snd_hda_param_read(codec, nid, AC_PAR_FUNCTION_TYPE) & 0xff) == - AC_GRP_AUDIO_FUNCTION) - return nid; + switch((snd_hda_param_read(codec, nid, AC_PAR_FUNCTION_TYPE) & 0xff)) { + case AC_GRP_AUDIO_FUNCTION: + codec->afg = nid; + break; + case AC_GRP_MODEM_FUNCTION: + codec->mfg = nid; + break; + default: + break; + } } - return 0; } /* @@ -507,10 +511,9 @@ int snd_hda_codec_new(struct hda_bus *bus, unsigned int codec_addr, codec->subsystem_id = snd_hda_param_read(codec, AC_NODE_ROOT, AC_PAR_SUBSYSTEM_ID); codec->revision_id = snd_hda_param_read(codec, AC_NODE_ROOT, AC_PAR_REV_ID); - /* FIXME: support for multiple AFGs? */ - codec->afg = look_for_afg_node(codec); - if (! codec->afg) { - snd_printdd("hda_codec: no AFG node found\n"); + setup_fg_nodes(codec); + if (! codec->afg && ! codec->mfg) { + snd_printdd("hda_codec: no AFG or MFG node found\n"); snd_hda_codec_free(codec); return -ENODEV; } @@ -1163,6 +1166,7 @@ int snd_hda_build_controls(struct hda_bus *bus) static unsigned int rate_bits[][3] = { /* rate in Hz, ALSA rate bitmask, HDA format value */ { 8000, SNDRV_PCM_RATE_8000, 0x0500 }, /* 1/6 x 48 */ + { 9600, SNDRV_PCM_RATE_KNOT, 0x0400 }, /* 1/5 x 48 */ { 11025, SNDRV_PCM_RATE_11025, 0x4300 }, /* 1/4 x 44 */ { 16000, SNDRV_PCM_RATE_16000, 0x0200 }, /* 1/3 x 48 */ { 22050, SNDRV_PCM_RATE_22050, 0x4100 }, /* 1/2 x 44 */ diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index dd0d99d2ad27..63a29a8a2860 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -514,6 +514,7 @@ struct hda_codec { struct list_head list; /* list point */ hda_nid_t afg; /* AFG node id */ + hda_nid_t mfg; /* MFG node id */ /* ids */ u32 vendor_id; diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 2d046abb5911..1229227af5b5 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -881,6 +881,11 @@ int snd_hda_parse_generic_codec(struct hda_codec *codec) struct hda_gspec *spec; int err; + if(!codec->afg) { + snd_printdd("hda_generic: no generic modem yet\n"); + return -ENODEV; + } + spec = kcalloc(1, sizeof(*spec), GFP_KERNEL); if (spec == NULL) { printk(KERN_ERR "hda_generic: can't allocate spec\n"); From b65f824c1ea954ea2b974e42c064f72bfbfe3dd2 Mon Sep 17 00:00:00 2001 From: Sasha Khapyorsky Date: Thu, 11 Aug 2005 11:18:38 +0200 Subject: [PATCH 527/584] [ALSA] hda-codec - support for Si3054/5 HDA modems HDA generic driver,HDA Codec driver Support for Si3054/5 HDA modem codecs. Signed-off-by: Sasha Khapyorsky Signed-off-by: Takashi Iwai --- sound/pci/hda/Makefile | 2 +- sound/pci/hda/hda_patch.h | 3 + sound/pci/hda/patch_si3054.c | 300 +++++++++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 sound/pci/hda/patch_si3054.c diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile index bd8cb33c4fb4..ddfb5ff7fb8f 100644 --- a/sound/pci/hda/Makefile +++ b/sound/pci/hda/Makefile @@ -1,5 +1,5 @@ snd-hda-intel-objs := hda_intel.o -snd-hda-codec-objs := hda_codec.o hda_generic.o patch_realtek.o patch_cmedia.o patch_analog.o patch_sigmatel.o +snd-hda-codec-objs := hda_codec.o hda_generic.o patch_realtek.o patch_cmedia.o patch_analog.o patch_sigmatel.o patch_si3054.o ifdef CONFIG_PROC_FS snd-hda-codec-objs += hda_proc.o endif diff --git a/sound/pci/hda/hda_patch.h b/sound/pci/hda/hda_patch.h index a5de684b6944..acaef3c811b8 100644 --- a/sound/pci/hda/hda_patch.h +++ b/sound/pci/hda/hda_patch.h @@ -10,11 +10,14 @@ extern struct hda_codec_preset snd_hda_preset_cmedia[]; extern struct hda_codec_preset snd_hda_preset_analog[]; /* SigmaTel codecs */ extern struct hda_codec_preset snd_hda_preset_sigmatel[]; +/* SiLabs 3054/3055 modem codecs */ +extern struct hda_codec_preset snd_hda_preset_si3054[]; static const struct hda_codec_preset *hda_preset_tables[] = { snd_hda_preset_realtek, snd_hda_preset_cmedia, snd_hda_preset_analog, snd_hda_preset_sigmatel, + snd_hda_preset_si3054, NULL }; diff --git a/sound/pci/hda/patch_si3054.c b/sound/pci/hda/patch_si3054.c new file mode 100644 index 000000000000..b0270d1b64ce --- /dev/null +++ b/sound/pci/hda/patch_si3054.c @@ -0,0 +1,300 @@ +/* + * Universal Interface for Intel High Definition Audio Codec + * + * HD audio interface patch for Silicon Labs 3054/5 modem codec + * + * Copyright (c) 2005 Sasha Khapyorsky + * Takashi Iwai + * + * + * This driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include "hda_codec.h" +#include "hda_local.h" + + +/* si3054 verbs */ +#define SI3054_VERB_READ_NODE 0x900 +#define SI3054_VERB_WRITE_NODE 0x100 + +/* si3054 nodes (registers) */ +#define SI3054_EXTENDED_MID 2 +#define SI3054_LINE_RATE 3 +#define SI3054_LINE_LEVEL 4 +#define SI3054_GPIO_CFG 5 +#define SI3054_GPIO_POLARITY 6 +#define SI3054_GPIO_STICKY 7 +#define SI3054_GPIO_WAKEUP 8 +#define SI3054_GPIO_STATUS 9 +#define SI3054_GPIO_CONTROL 10 +#define SI3054_MISC_AFE 11 +#define SI3054_CHIPID 12 +#define SI3054_LINE_CFG1 13 +#define SI3054_LINE_STATUS 14 +#define SI3054_DC_TERMINATION 15 +#define SI3054_LINE_CONFIG 16 +#define SI3054_CALLPROG_ATT 17 +#define SI3054_SQ_CONTROL 18 +#define SI3054_MISC_CONTROL 19 +#define SI3054_RING_CTRL1 20 +#define SI3054_RING_CTRL2 21 + +/* extended MID */ +#define SI3054_MEI_READY 0xf + +/* line level */ +#define SI3054_ATAG_MASK 0x00f0 +#define SI3054_DTAG_MASK 0xf000 + +/* GPIO bits */ +#define SI3054_GPIO_OH 0x0001 +#define SI3054_GPIO_CID 0x0002 + +/* chipid and revisions */ +#define SI3054_CHIPID_CODEC_REV_MASK 0x000f +#define SI3054_CHIPID_DAA_REV_MASK 0x00f0 +#define SI3054_CHIPID_INTERNATIONAL 0x0100 +#define SI3054_CHIPID_DAA_ID 0x0f00 +#define SI3054_CHIPID_CODEC_ID (1<<12) + +/* si3054 codec registers (nodes) access macros */ +#define GET_REG(codec,reg) (snd_hda_codec_read(codec,reg,0,SI3054_VERB_READ_NODE,0)) +#define SET_REG(codec,reg,val) (snd_hda_codec_write(codec,reg,0,SI3054_VERB_WRITE_NODE,val)) + + +struct si3054_spec { + unsigned international; + struct hda_pcm pcm; +}; + + +/* + * Modem mixer + */ + +#define PRIVATE_VALUE(reg,mask) ((reg<<16)|(mask&0xffff)) +#define PRIVATE_REG(val) ((val>>16)&0xffff) +#define PRIVATE_MASK(val) (val&0xffff) + +static int si3054_switch_info(snd_kcontrol_t *kcontrol, + snd_ctl_elem_info_t *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int si3054_switch_get(snd_kcontrol_t *kcontrol, + snd_ctl_elem_value_t *uvalue) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + u16 reg = PRIVATE_REG(kcontrol->private_value); + u16 mask = PRIVATE_MASK(kcontrol->private_value); + uvalue->value.integer.value[0] = (GET_REG(codec, reg)) & mask ? 1 : 0 ; + return 0; +} + +static int si3054_switch_put(snd_kcontrol_t *kcontrol, + snd_ctl_elem_value_t *uvalue) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + u16 reg = PRIVATE_REG(kcontrol->private_value); + u16 mask = PRIVATE_MASK(kcontrol->private_value); + if (uvalue->value.integer.value[0]) + SET_REG(codec, reg, (GET_REG(codec, reg)) | mask); + else + SET_REG(codec, reg, (GET_REG(codec, reg)) & ~mask); + return 0; +} + +#define SI3054_KCONTROL(kname,reg,mask) { \ + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = kname, \ + .info = si3054_switch_info, \ + .get = si3054_switch_get, \ + .put = si3054_switch_put, \ + .private_value = PRIVATE_VALUE(reg,mask), \ +} + + +static snd_kcontrol_new_t si3054_modem_mixer[] = { + SI3054_KCONTROL("Off-hook Switch", SI3054_GPIO_CONTROL, SI3054_GPIO_OH), + SI3054_KCONTROL("Caller ID Switch", SI3054_GPIO_CONTROL, SI3054_GPIO_CID), + {} +}; + +static int si3054_build_controls(struct hda_codec *codec) +{ + return snd_hda_add_new_ctls(codec, si3054_modem_mixer); +} + + +/* + * PCM callbacks + */ + +static int si3054_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + snd_pcm_substream_t *substream) +{ + u16 val; + + SET_REG(codec, SI3054_LINE_RATE, substream->runtime->rate); + val = GET_REG(codec, SI3054_LINE_LEVEL); + val &= 0xff << (8 * (substream->stream != SNDRV_PCM_STREAM_PLAYBACK)); + val |= ((stream_tag & 0xf) << 4) << (8 * (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)); + SET_REG(codec, SI3054_LINE_LEVEL, val); + + snd_hda_codec_setup_stream(codec, hinfo->nid, + stream_tag, 0, format); + return 0; +} + +static int si3054_pcm_open(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + snd_pcm_substream_t *substream) +{ + static unsigned int rates[] = { 8000, 9600, 16000 }; + static snd_pcm_hw_constraint_list_t hw_constraints_rates = { + .count = ARRAY_SIZE(rates), + .list = rates, + .mask = 0, + }; + substream->runtime->hw.period_bytes_min = 80; + return snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, &hw_constraints_rates); +} + + +static struct hda_pcm_stream si3054_pcm = { + .substreams = 1, + .channels_min = 1, + .channels_max = 1, + .nid = 0x1, + .rates = SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_KNOT, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .maxbps = 16, + .ops = { + .open = si3054_pcm_open, + .prepare = si3054_pcm_prepare, + }, +}; + + +static int si3054_build_pcms(struct hda_codec *codec) +{ + struct si3054_spec *spec = codec->spec; + struct hda_pcm *info = &spec->pcm; + si3054_pcm.nid = codec->mfg; + codec->num_pcms = 1; + codec->pcm_info = info; + info->name = "Si3054 Modem"; + info->stream[SNDRV_PCM_STREAM_PLAYBACK] = si3054_pcm; + info->stream[SNDRV_PCM_STREAM_CAPTURE] = si3054_pcm; + return 0; +} + + +/* + * Init part + */ + +static int si3054_init(struct hda_codec *codec) +{ + struct si3054_spec *spec = codec->spec; + unsigned wait_count; + u16 val; + + snd_hda_codec_write(codec, AC_NODE_ROOT, 0, AC_VERB_SET_CODEC_RESET, 0); + snd_hda_codec_write(codec, codec->mfg, 0, AC_VERB_SET_STREAM_FORMAT, 0); + SET_REG(codec, SI3054_LINE_RATE, 9600); + SET_REG(codec, SI3054_LINE_LEVEL, SI3054_DTAG_MASK|SI3054_ATAG_MASK); + SET_REG(codec, SI3054_EXTENDED_MID, 0); + + wait_count = 10; + do { + msleep(2); + val = GET_REG(codec, SI3054_EXTENDED_MID); + } while ((val & SI3054_MEI_READY) != SI3054_MEI_READY && wait_count--); + + if((val&SI3054_MEI_READY) != SI3054_MEI_READY) { + snd_printk(KERN_ERR "si3054: cannot initialize. EXT MID = %04x\n", val); + return -EACCES; + } + + SET_REG(codec, SI3054_GPIO_POLARITY, 0xffff); + SET_REG(codec, SI3054_GPIO_CFG, 0x0); + SET_REG(codec, SI3054_MISC_AFE, 0); + SET_REG(codec, SI3054_LINE_CFG1,0x200); + + if((GET_REG(codec,SI3054_LINE_STATUS) & (1<<6)) == 0) { + snd_printd("Link Frame Detect(FDT) is not ready (line status: %04x)\n", + GET_REG(codec,SI3054_LINE_STATUS)); + } + + spec->international = GET_REG(codec, SI3054_CHIPID) & SI3054_CHIPID_INTERNATIONAL; + + return 0; +} + +static void si3054_free(struct hda_codec *codec) +{ + kfree(codec->spec); +} + + +/* + */ + +static struct hda_codec_ops si3054_patch_ops = { + .build_controls = si3054_build_controls, + .build_pcms = si3054_build_pcms, + .init = si3054_init, + .free = si3054_free, +#ifdef CONFIG_PM + //.suspend = si3054_suspend, + .resume = si3054_init, +#endif +}; + +static int patch_si3054(struct hda_codec *codec) +{ + struct si3054_spec *spec = kcalloc(1, sizeof(*spec), GFP_KERNEL); + if (spec == NULL) + return -ENOMEM; + codec->spec = spec; + codec->patch_ops = si3054_patch_ops; + return 0; +} + +/* + * patch entries + */ +struct hda_codec_preset snd_hda_preset_si3054[] = { + { .id = 0x163c3155, .name = "Si3054", .patch = patch_si3054 }, + {} +}; + From a53fc188ec6fc406276799da465fe789c40d96b2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 11 Aug 2005 15:59:17 +0200 Subject: [PATCH 528/584] [ALSA] make local objects static Memalloc module,PCM Midlevel,Timer Midlevel,GUS Library,AC97 Codec ALI5451 driver,RME9652 driver Make some functions/variables that are used in only one file static. Signed-off-by: Clemens Ladisch --- sound/core/memalloc.c | 2 +- sound/core/pcm_lib.c | 4 ++-- sound/core/timer.c | 4 ++-- sound/isa/gus/gus_io.c | 6 ++++-- sound/pci/ac97/ac97_patch.c | 8 ++++---- sound/pci/ali5451/ali5451.c | 2 +- sound/pci/rme9652/hdspm.c | 4 ++-- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 02132561c3f8..371215cd9e8f 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -512,7 +512,7 @@ static void free_all_reserved_pages(void) * proc file interface */ #define SND_MEM_PROC_FILE "driver/snd-page-alloc" -struct proc_dir_entry *snd_mem_proc; +static struct proc_dir_entry *snd_mem_proc; static int snd_mem_proc_read(char *page, char **start, off_t off, int count, int *eof, void *data) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index c41ec2e9f201..0082914a7e33 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1584,8 +1584,8 @@ int snd_pcm_hw_param_set(snd_pcm_t *pcm, snd_pcm_hw_params_t *params, return snd_pcm_hw_param_value(params, var, NULL); } -int _snd_pcm_hw_param_mask(snd_pcm_hw_params_t *params, - snd_pcm_hw_param_t var, const snd_mask_t *val) +static int _snd_pcm_hw_param_mask(snd_pcm_hw_params_t *params, + snd_pcm_hw_param_t var, const snd_mask_t *val) { int changed; assert(hw_is_mask(var)); diff --git a/sound/core/timer.c b/sound/core/timer.c index cfaccd415b3b..c3997e047214 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -799,13 +799,13 @@ static int snd_timer_free(snd_timer_t *timer) return 0; } -int snd_timer_dev_free(snd_device_t *device) +static int snd_timer_dev_free(snd_device_t *device) { snd_timer_t *timer = device->device_data; return snd_timer_free(timer); } -int snd_timer_dev_register(snd_device_t *dev) +static int snd_timer_dev_register(snd_device_t *dev) { snd_timer_t *timer = dev->device_data; snd_timer_t *timer1; diff --git a/sound/isa/gus/gus_io.c b/sound/isa/gus/gus_io.c index 337b0e2a8a36..23e1b5f19e1a 100644 --- a/sound/isa/gus/gus_io.c +++ b/sound/isa/gus/gus_io.c @@ -269,8 +269,9 @@ void snd_gf1_i_write_addr(snd_gus_card_t * gus, unsigned char reg, #endif /* 0 */ -unsigned int snd_gf1_i_read_addr(snd_gus_card_t * gus, - unsigned char reg, short w_16bit) +#ifdef CONFIG_SND_DEBUG +static unsigned int snd_gf1_i_read_addr(snd_gus_card_t * gus, + unsigned char reg, short w_16bit) { unsigned int res; unsigned long flags; @@ -280,6 +281,7 @@ unsigned int snd_gf1_i_read_addr(snd_gus_card_t * gus, spin_unlock_irqrestore(&gus->reg_lock, flags); return res; } +#endif /* diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index a51b61d5066b..b0b31f4015aa 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -375,7 +375,7 @@ AC97_DOUBLE("Front Playback Volume", AC97_WM97XX_FMIXER_VOL, 8, 0, 31, 1), AC97_SINGLE("Front Playback Switch", AC97_WM97XX_FMIXER_VOL, 15, 1, 1), }; -int patch_wolfson_wm9703_specific(ac97_t * ac97) +static int patch_wolfson_wm9703_specific(ac97_t * ac97) { /* This is known to work for the ViewSonic ViewPad 1000 * Randolph Bentson @@ -410,7 +410,7 @@ AC97_DOUBLE("Rear DAC Volume", AC97_WM9704_RPCM_VOL, 8, 0, 31, 1), AC97_DOUBLE("Surround Volume", AC97_SURROUND_MASTER, 8, 0, 31, 1), }; -int patch_wolfson_wm9704_specific(ac97_t * ac97) +static int patch_wolfson_wm9704_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm9704_snd_ac97_controls); i++) { @@ -433,7 +433,7 @@ int patch_wolfson04(ac97_t * ac97) return 0; } -int patch_wolfson_wm9705_specific(ac97_t * ac97) +static int patch_wolfson_wm9705_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm97xx_snd_ac97_controls); i++) { @@ -558,7 +558,7 @@ AC97_SINGLE("Headphone ZC Switch", AC97_HEADPHONE, 7, 1, 0), AC97_SINGLE("Mono ZC Switch", AC97_MASTER_MONO, 7, 1, 0), }; -int patch_wolfson_wm9711_specific(ac97_t * ac97) +static int patch_wolfson_wm9711_specific(ac97_t * ac97) { int err, i; diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index cb12d780a6c6..ce6c9fadb594 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -1842,7 +1842,7 @@ static int __devinit snd_ali_pcm(ali_t * codec, int device, struct ali_pcm_descr return 0; } -struct ali_pcm_description ali_pcms[] = { +static struct ali_pcm_description ali_pcms[] = { { "ALI 5451", ALI_CHANNELS, 1, &snd_ali_playback_ops, &snd_ali_capture_ops }, { "ALI 5451 modem", 1, 1, &snd_ali_modem_playback_ops, &snd_ali_modem_capture_ops } }; diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index f6daec4e4d97..d4a0c2c56cdb 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1104,14 +1104,14 @@ static int snd_hdspm_midi_output_close(snd_rawmidi_substream_t * substream) return 0; } -snd_rawmidi_ops_t snd_hdspm_midi_output = +static snd_rawmidi_ops_t snd_hdspm_midi_output = { .open = snd_hdspm_midi_output_open, .close = snd_hdspm_midi_output_close, .trigger = snd_hdspm_midi_output_trigger, }; -snd_rawmidi_ops_t snd_hdspm_midi_input = +static snd_rawmidi_ops_t snd_hdspm_midi_input = { .open = snd_hdspm_midi_input_open, .close = snd_hdspm_midi_input_close, From 15a24c0778e9bdd48d8e1cf60a263837b5c30ed5 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 12 Aug 2005 08:25:26 +0200 Subject: [PATCH 529/584] [ALSA] usb-audio: use 1 ms URBs when capturing USB generic driver When capturing audio data, we do not know beforehand how many samples the device sends per frame, so we have to use URBs that are as short as possible to make sure that we can handle period boundaries without any additional latencies. Furthermore, the total count of URBs submitted doesn't matter when capturing, so we can just use the maximum number. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 8298c462c291..9e38d3d1322a 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -97,7 +97,7 @@ MODULE_PARM_DESC(async_unlink, "Use async unlink mode."); #define MAX_PACKS 10 #define MAX_PACKS_HS (MAX_PACKS * 8) /* in high speed mode */ -#define MAX_URBS 5 /* max. 20ms long packets */ +#define MAX_URBS 8 #define SYNC_URBS 4 /* always four urbs for sync */ #define MIN_PACKS_URB 1 /* minimum 1 packet per urb */ @@ -920,10 +920,12 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by else subs->curpacksize = maxsize; - if (snd_usb_get_speed(subs->dev) == USB_SPEED_FULL) + if (is_playback) urb_packs = nrpacks; else - urb_packs = (nrpacks * 8) >> subs->datainterval; + urb_packs = 1; + if (snd_usb_get_speed(subs->dev) == USB_SPEED_HIGH) + urb_packs = (urb_packs * 8) >> subs->datainterval; /* allocate a temporary buffer for playback */ if (is_playback) { @@ -935,9 +937,13 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by } /* decide how many packets to be used */ - total_packs = (period_bytes + maxsize - 1) / maxsize; - if (total_packs < 2 * MIN_PACKS_URB) - total_packs = 2 * MIN_PACKS_URB; + if (is_playback) { + total_packs = (period_bytes + maxsize - 1) / maxsize; + if (total_packs < 2 * MIN_PACKS_URB) + total_packs = 2 * MIN_PACKS_URB; + } else { + total_packs = MAX_URBS * urb_packs; + } subs->nurbs = (total_packs + urb_packs - 1) / urb_packs; if (subs->nurbs > MAX_URBS) { /* too much... */ From d6db392e9235c48bb945624798e9beede7b85b12 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 12 Aug 2005 08:28:27 +0200 Subject: [PATCH 530/584] [ALSA] usb-audio: fix packets per URB calculation for playback USB generic driver When determining how many packets are needed for one period, we cannot assume that all packets have their maximum size -- we always use the nominal sample rate when sending data, and could use an even lower rate when the endpoint uses frequency feedback. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 9e38d3d1322a..d28106e390c4 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -938,7 +938,15 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by /* decide how many packets to be used */ if (is_playback) { - total_packs = (period_bytes + maxsize - 1) / maxsize; + unsigned int minsize; + /* determine how small a packet can be */ + minsize = (subs->freqn >> (16 - subs->datainterval)) + * (frame_bits >> 3); + /* with sync from device, assume it can be 25% lower */ + if (subs->syncpipe) + minsize -= minsize >> 2; + minsize = max(minsize, 1u); + total_packs = (period_bytes + minsize - 1) / minsize; if (total_packs < 2 * MIN_PACKS_URB) total_packs = 2 * MIN_PACKS_URB; } else { From 71d848ca00a16179b17e58e5f51c2d9a6c4f97a2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 12 Aug 2005 15:18:00 +0200 Subject: [PATCH 531/584] [ALSA] usb-audio: make nrpacks parameter writeable USB generic driver The nrpacks module parameter is used only when initializing a playback stream, so it doesn't hurt to make it writeable. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index d28106e390c4..49075f06f846 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -79,7 +79,7 @@ module_param_array(vid, int, NULL, 0444); MODULE_PARM_DESC(vid, "Vendor ID for the USB audio device."); module_param_array(pid, int, NULL, 0444); MODULE_PARM_DESC(pid, "Product ID for the USB audio device."); -module_param(nrpacks, int, 0444); +module_param(nrpacks, int, 0644); MODULE_PARM_DESC(nrpacks, "Max. number of packets per URB."); module_param(async_unlink, bool, 0444); MODULE_PARM_DESC(async_unlink, "Use async unlink mode."); @@ -920,9 +920,11 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by else subs->curpacksize = maxsize; - if (is_playback) + if (is_playback) { urb_packs = nrpacks; - else + urb_packs = max(urb_packs, (unsigned int)MIN_PACKS_URB); + urb_packs = min(urb_packs, (unsigned int)MAX_PACKS); + } else urb_packs = 1; if (snd_usb_get_speed(subs->dev) == USB_SPEED_HIGH) urb_packs = (urb_packs * 8) >> subs->datainterval; From a93bf99077886d209f8e72bc134e1ceb36e76aa2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 12 Aug 2005 15:19:39 +0200 Subject: [PATCH 532/584] [ALSA] usb-audio: schedule high speed URBs with 1 ms alignment USB generic driver The EHCI driver doesn't interrupt more than once per millisecond, and organizes all iso transfers with frame-sized ITDs, so we can (try to) be more efficient by aligning all URBs on frame boundaries. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 49075f06f846..a62d1313da17 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -893,7 +893,7 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by { unsigned int maxsize, n, i; int is_playback = subs->direction == SNDRV_PCM_STREAM_PLAYBACK; - unsigned int npacks[MAX_URBS], urb_packs, total_packs; + unsigned int npacks[MAX_URBS], urb_packs, total_packs, packs_per_ms; /* calculate the frequency in 16.16 format */ if (snd_usb_get_speed(subs->dev) == USB_SPEED_FULL) @@ -920,14 +920,18 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by else subs->curpacksize = maxsize; + if (snd_usb_get_speed(subs->dev) == USB_SPEED_HIGH) + packs_per_ms = 8 >> subs->datainterval; + else + packs_per_ms = 1; + if (is_playback) { urb_packs = nrpacks; urb_packs = max(urb_packs, (unsigned int)MIN_PACKS_URB); urb_packs = min(urb_packs, (unsigned int)MAX_PACKS); } else urb_packs = 1; - if (snd_usb_get_speed(subs->dev) == USB_SPEED_HIGH) - urb_packs = (urb_packs * 8) >> subs->datainterval; + urb_packs *= packs_per_ms; /* allocate a temporary buffer for playback */ if (is_playback) { @@ -949,8 +953,12 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by minsize -= minsize >> 2; minsize = max(minsize, 1u); total_packs = (period_bytes + minsize - 1) / minsize; - if (total_packs < 2 * MIN_PACKS_URB) - total_packs = 2 * MIN_PACKS_URB; + /* round up to multiple of packs_per_ms */ + total_packs = (total_packs + packs_per_ms - 1) + & ~(packs_per_ms - 1); + /* we need at least two URBs for queueing */ + if (total_packs < 2 * MIN_PACKS_URB * packs_per_ms) + total_packs = 2 * MIN_PACKS_URB * packs_per_ms; } else { total_packs = MAX_URBS * urb_packs; } @@ -972,7 +980,7 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by subs->nurbs = 2; npacks[0] = (total_packs + 1) / 2; npacks[1] = total_packs - npacks[0]; - } else if (npacks[subs->nurbs-1] < MIN_PACKS_URB) { + } else if (npacks[subs->nurbs-1] < MIN_PACKS_URB * packs_per_ms) { /* the last packet is too small.. */ if (subs->nurbs > 2) { /* merge to the first one */ From 47672310789431c16de22f16934febf4d55b4bb0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 12 Aug 2005 16:44:04 +0200 Subject: [PATCH 533/584] [ALSA] hda-intel - Add SiS966 support HDA Intel driver Added SiS966 pci id to snd-hda-intel driver. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 288ab0764830..2b6bd3139b86 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -71,7 +71,8 @@ MODULE_SUPPORTED_DEVICE("{{Intel, ICH6}," "{Intel, ESB2}," "{ATI, SB450}," "{VIA, VT8251}," - "{VIA, VT8237A}}"); + "{VIA, VT8237A}," + "{SiS, SIS966}}"); MODULE_DESCRIPTION("Intel HDA driver"); #define SFX "hda-intel: " @@ -1464,6 +1465,7 @@ static struct pci_device_id azx_ids[] = { { 0x8086, 0x269a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ESB2 */ { 0x1002, 0x437b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ATI SB450 */ { 0x1106, 0x3288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* VIA VT8251/VT8237A */ + { 0x1039, 0x7502, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* SIS966 */ { 0x10b9, 0x5461, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ALI 5461? */ { 0, } }; From 4b4994869d68328276f6a3cc9df90cce8c82b43d Mon Sep 17 00:00:00 2001 From: Philip Prindeville Date: Fri, 12 Aug 2005 16:46:17 +0200 Subject: [PATCH 534/584] [ALSA] Add VT1617A codec support AC97 Codec Added (minimal) support of VT1617A codec with SPDIF. Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_codec.c | 6 ++++-- sound/pci/ac97/ac97_patch.c | 18 ++++++++++++++---- sound/pci/ac97/ac97_patch.h | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 33dba10f03e8..600e053dfd35 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -157,6 +157,7 @@ static const ac97_codec_id_t snd_ac97_codec_ids[] = { { 0x54524123, 0xffffffff, "TR28602", NULL, NULL }, // only guess --jk [TR28023 = eMicro EM28023 (new CT1297)] { 0x54584e20, 0xffffffff, "TLC320AD9xC", NULL, NULL }, { 0x56494161, 0xffffffff, "VIA1612A", NULL, NULL }, // modified ICE1232 with S/PDIF +{ 0x56494170, 0xffffffff, "VIA1617A", patch_vt1617a, NULL }, // modified VT1616 with S/PDIF { 0x57454301, 0xffffffff, "W83971D", NULL, NULL }, { 0x574d4c00, 0xffffffff, "WM9701A", NULL, NULL }, { 0x574d4C03, 0xffffffff, "WM9703,WM9707,WM9708,WM9717", patch_wolfson03, NULL}, @@ -2580,8 +2581,6 @@ int snd_ac97_tune_hardware(ac97_t *ac97, struct ac97_quirk *quirk, const char *o { int result; - snd_assert(quirk, return -EINVAL); - /* quirk overriden? */ if (override && strcmp(override, "-1") && strcmp(override, "default")) { result = apply_quirk_str(ac97, override); @@ -2590,6 +2589,9 @@ int snd_ac97_tune_hardware(ac97_t *ac97, struct ac97_quirk *quirk, const char *o return result; } + if (! quirk) + return -EINVAL; + for (; quirk->subvendor; quirk++) { if (quirk->subvendor != ac97->subsystem_vendor) continue; diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index b0b31f4015aa..a0d515536177 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -375,7 +375,7 @@ AC97_DOUBLE("Front Playback Volume", AC97_WM97XX_FMIXER_VOL, 8, 0, 31, 1), AC97_SINGLE("Front Playback Switch", AC97_WM97XX_FMIXER_VOL, 15, 1, 1), }; -static int patch_wolfson_wm9703_specific(ac97_t * ac97) +int patch_wolfson_wm9703_specific(ac97_t * ac97) { /* This is known to work for the ViewSonic ViewPad 1000 * Randolph Bentson @@ -410,7 +410,7 @@ AC97_DOUBLE("Rear DAC Volume", AC97_WM9704_RPCM_VOL, 8, 0, 31, 1), AC97_DOUBLE("Surround Volume", AC97_SURROUND_MASTER, 8, 0, 31, 1), }; -static int patch_wolfson_wm9704_specific(ac97_t * ac97) +int patch_wolfson_wm9704_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm9704_snd_ac97_controls); i++) { @@ -433,7 +433,7 @@ int patch_wolfson04(ac97_t * ac97) return 0; } -static int patch_wolfson_wm9705_specific(ac97_t * ac97) +int patch_wolfson_wm9705_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm97xx_snd_ac97_controls); i++) { @@ -558,7 +558,7 @@ AC97_SINGLE("Headphone ZC Switch", AC97_HEADPHONE, 7, 1, 0), AC97_SINGLE("Mono ZC Switch", AC97_MASTER_MONO, 7, 1, 0), }; -static int patch_wolfson_wm9711_specific(ac97_t * ac97) +int patch_wolfson_wm9711_specific(ac97_t * ac97) { int err, i; @@ -2665,6 +2665,16 @@ int patch_vt1616(ac97_t * ac97) return 0; } +/* + * VT1617A codec + */ +int patch_vt1617a(ac97_t * ac97) +{ + ac97->ext_id |= AC97_EI_SPDIF; /* force the detection of spdif */ + ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000; + return 0; +} + /* */ static void it2646_update_jacks(ac97_t *ac97) diff --git a/sound/pci/ac97/ac97_patch.h b/sound/pci/ac97/ac97_patch.h index 7b7377d0f2ae..ec1811320106 100644 --- a/sound/pci/ac97/ac97_patch.h +++ b/sound/pci/ac97/ac97_patch.h @@ -56,5 +56,6 @@ int patch_cm9739(ac97_t * ac97); int patch_cm9761(ac97_t * ac97); int patch_cm9780(ac97_t * ac97); int patch_vt1616(ac97_t * ac97); +int patch_vt1617a(ac97_t * ac97); int patch_it2646(ac97_t * ac97); int mpatch_si3036(ac97_t * ac97); From f347c774c37c83e7b2d2b75d962ca40b9a283b2b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 12 Aug 2005 16:47:49 +0200 Subject: [PATCH 535/584] [ALSA] via82xx - Add DXS entry for MSI K8MM-V VIA82xx driver Added the DXS entry for MSI K8MM-V. Also fixed a typo in last patch for Acer Aspire 1524 WLMi. Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 8ddc023a5b7f..38b96eabea60 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2153,7 +2153,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci) { .subvendor = 0x1019, .subdevice = 0x0a81, .action = VIA_DXS_NO_VRA }, /* ECS K7VTA3 v8.0 */ { .subvendor = 0x1019, .subdevice = 0x0a85, .action = VIA_DXS_NO_VRA }, /* ECS L7VMM2 */ { .subvendor = 0x1025, .subdevice = 0x0033, .action = VIA_DXS_NO_VRA }, /* Acer Inspire 1353LM */ - { .subvendor = 0x1025, .subdevice = 0x0046, .action = VIA_DXS_SRC }, /* Acer Aspire 1524 WMLi */ + { .subvendor = 0x1025, .subdevice = 0x0046, .action = VIA_DXS_SRC }, /* Acer Aspire 1524 WLMi */ { .subvendor = 0x1043, .subdevice = 0x8095, .action = VIA_DXS_NO_VRA }, /* ASUS A7V8X (FIXME: possibly VIA_DXS_ENABLE?)*/ { .subvendor = 0x1043, .subdevice = 0x80a1, .action = VIA_DXS_NO_VRA }, /* ASUS A7V8-X */ { .subvendor = 0x1043, .subdevice = 0x80b0, .action = VIA_DXS_NO_VRA }, /* ASUS A7V600 & K8V*/ @@ -2173,6 +2173,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci) { .subvendor = 0x1462, .subdevice = 0x5901, .action = VIA_DXS_NO_VRA }, /* MSI KT6 Delta-SR */ { .subvendor = 0x1462, .subdevice = 0x7023, .action = VIA_DXS_NO_VRA }, /* MSI K8T Neo2-FI */ { .subvendor = 0x1462, .subdevice = 0x7120, .action = VIA_DXS_ENABLE }, /* MSI KT4V */ + { .subvendor = 0x1462, .subdevice = 0x7142, .action = VIA_DXS_ENABLE }, /* MSI K8MM-V */ { .subvendor = 0x147b, .subdevice = 0x1401, .action = VIA_DXS_ENABLE }, /* ABIT KD7(-RAID) */ { .subvendor = 0x147b, .subdevice = 0x1411, .action = VIA_DXS_ENABLE }, /* ABIT VA-20 */ { .subvendor = 0x147b, .subdevice = 0x1413, .action = VIA_DXS_ENABLE }, /* ABIT KV8 Pro */ From 1265509cb0f00206c2ed93f120121dd818a3902d Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 12 Aug 2005 17:24:24 +0200 Subject: [PATCH 536/584] [ALSA] opl3sa2 driver - added support for PnP BIOS devices OPL3SA2 driver The driver has been extended to support also PnP BIOS devices. The detection path is: 1) detect PnP BIOS devices 2) detect ISA PnP devices Signed-off-by: Jaroslav Kysela --- sound/isa/opl3sa2.c | 112 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 23 deletions(-) diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index 95c7b3e53407..c4ebf57b9fc3 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -145,6 +145,14 @@ static snd_card_t *snd_opl3sa2_legacy[SNDRV_CARDS] = SNDRV_DEFAULT_PTR; #ifdef CONFIG_PNP +static struct pnp_device_id snd_opl3sa2_pnpbiosids[] = { + { .id = "YMF0021" }, + { .id = "NMX2210" }, /* Gateway Solo 2500 */ + { .id = "" } /* end */ +}; + +MODULE_DEVICE_TABLE(pnp, snd_opl3sa2_pnpbiosids); + static struct pnp_card_device_id snd_opl3sa2_pnpids[] = { /* Yamaha YMF719E-S (Genius Sound Maker 3DX) */ { .id = "YMH0020", .devs = { { "YMH0021" } } }, @@ -568,20 +576,18 @@ static int snd_opl3sa2_resume(snd_card_t *card) #ifdef CONFIG_PNP static int __init snd_opl3sa2_pnp(int dev, opl3sa2_t *chip, - struct pnp_card_link *card, - const struct pnp_card_device_id *id) + struct pnp_dev *pdev, + int isapnp) { - struct pnp_dev *pdev; - struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); + struct pnp_resource_table * cfg; int err; + if (!isapnp && pnp_device_is_isapnp(pdev)) + return -ENOENT; /* we have another procedure - card */ + + cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); if (!cfg) return -ENOMEM; - pdev = chip->dev = pnp_request_card_device(card, id->devs[0].id, NULL); - if (chip->dev == NULL) { - kfree(cfg); - return -EBUSY; - } /* PnP initialization */ pnp_init_resource_table(cfg); if (sb_port[dev] != SNDRV_AUTO_PORT) @@ -617,13 +623,31 @@ static int __init snd_opl3sa2_pnp(int dev, opl3sa2_t *chip, dma1[dev] = pnp_dma(pdev, 0); dma2[dev] = pnp_dma(pdev, 1); irq[dev] = pnp_irq(pdev, 0); - snd_printdd("PnP OPL3-SA: sb port=0x%lx, wss port=0x%lx, fm port=0x%lx, midi port=0x%lx\n", - sb_port[dev], wss_port[dev], fm_port[dev], midi_port[dev]); - snd_printdd("PnP OPL3-SA: control port=0x%lx, dma1=%i, dma2=%i, irq=%i\n", - port[dev], dma1[dev], dma2[dev], irq[dev]); + snd_printdd("%sPnP OPL3-SA: sb port=0x%lx, wss port=0x%lx, fm port=0x%lx, midi port=0x%lx\n", + pnp_device_is_pnpbios(pdev) ? "BIOS" : "ISA", sb_port[dev], wss_port[dev], fm_port[dev], midi_port[dev]); + snd_printdd("%sPnP OPL3-SA: control port=0x%lx, dma1=%i, dma2=%i, irq=%i\n", + pnp_device_is_pnpbios(pdev) ? "BIOS" : "ISA", port[dev], dma1[dev], dma2[dev], irq[dev]); kfree(cfg); + chip->dev = pdev; return 0; } + +static int __init snd_opl3sa2_cpnp(int dev, opl3sa2_t *chip, + struct pnp_card_link *card, + const struct pnp_card_device_id *id) +{ + struct pnp_dev *pdev; + struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); + + if (!cfg) + return -ENOMEM; + pdev = pnp_request_card_device(card, id->devs[0].id, NULL); + if (pdev == NULL) { + kfree(cfg); + return -EBUSY; + } + return snd_opl3sa2_pnp(dev, chip, pdev, 1); +} #endif /* CONFIG_PNP */ static int snd_opl3sa2_free(opl3sa2_t *chip) @@ -645,6 +669,7 @@ static int snd_opl3sa2_dev_free(snd_device_t *device) } static int __devinit snd_opl3sa2_probe(int dev, + struct pnp_dev *pdev, struct pnp_card_link *pcard, const struct pnp_card_device_id *pid) { @@ -695,8 +720,13 @@ static int __devinit snd_opl3sa2_probe(int dev, if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) goto __error; #ifdef CONFIG_PNP - if (isapnp[dev]) { - if ((err = snd_opl3sa2_pnp(dev, chip, pcard, pid)) < 0) + if (pdev) { + if ((err = snd_opl3sa2_pnp(dev, chip, pdev, 0)) < 0) + goto __error; + snd_card_set_dev(card, &pdev->dev); + } + if (pcard) { + if ((err = snd_opl3sa2_cpnp(dev, chip, pcard, pid)) < 0) goto __error; snd_card_set_dev(card, &pcard->card->dev); } @@ -768,7 +798,9 @@ static int __devinit snd_opl3sa2_probe(int dev, if ((err = snd_card_register(card)) < 0) goto __error; - if (pcard) + if (pdev) + pnp_set_drvdata(pdev, card); + else if (pcard) pnp_set_card_drvdata(pcard, card); else snd_opl3sa2_legacy[dev] = card; @@ -780,8 +812,8 @@ static int __devinit snd_opl3sa2_probe(int dev, } #ifdef CONFIG_PNP -static int __devinit snd_opl3sa2_pnp_detect(struct pnp_card_link *card, - const struct pnp_card_device_id *id) +static int __devinit snd_opl3sa2_pnp_detect(struct pnp_dev *pdev, + const struct pnp_device_id *id) { static int dev; int res; @@ -789,7 +821,7 @@ static int __devinit snd_opl3sa2_pnp_detect(struct pnp_card_link *card, for ( ; dev < SNDRV_CARDS; dev++) { if (!enable[dev] || !isapnp[dev]) continue; - res = snd_opl3sa2_probe(dev, card, id); + res = snd_opl3sa2_probe(dev, pdev, NULL, NULL); if (res < 0) return res; dev++; @@ -798,7 +830,40 @@ static int __devinit snd_opl3sa2_pnp_detect(struct pnp_card_link *card, return -ENODEV; } -static void __devexit snd_opl3sa2_pnp_remove(struct pnp_card_link * pcard) +static void __devexit snd_opl3sa2_pnp_remove(struct pnp_dev * pdev) +{ + snd_card_t *card = (snd_card_t *) pnp_get_drvdata(pdev); + + snd_card_disconnect(card); + snd_card_free_in_thread(card); +} + +static struct pnp_driver opl3sa2_pnp_driver = { + .name = "opl3sa2", + .id_table = snd_opl3sa2_pnpbiosids, + .probe = snd_opl3sa2_pnp_detect, + .remove = __devexit_p(snd_opl3sa2_pnp_remove), +}; + +static int __devinit snd_opl3sa2_pnp_cdetect(struct pnp_card_link *card, + const struct pnp_card_device_id *id) +{ + static int dev; + int res; + + for ( ; dev < SNDRV_CARDS; dev++) { + if (!enable[dev] || !isapnp[dev]) + continue; + res = snd_opl3sa2_probe(dev, NULL, card, id); + if (res < 0) + return res; + dev++; + return 0; + } + return -ENODEV; +} + +static void __devexit snd_opl3sa2_pnp_cremove(struct pnp_card_link * pcard) { snd_card_t *card = (snd_card_t *) pnp_get_card_drvdata(pcard); @@ -810,8 +875,8 @@ static struct pnp_card_driver opl3sa2_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, .name = "opl3sa2", .id_table = snd_opl3sa2_pnpids, - .probe = snd_opl3sa2_pnp_detect, - .remove = __devexit_p(snd_opl3sa2_pnp_remove), + .probe = snd_opl3sa2_pnp_cdetect, + .remove = __devexit_p(snd_opl3sa2_pnp_cremove), }; #endif /* CONFIG_PNP */ @@ -826,10 +891,11 @@ static int __init alsa_card_opl3sa2_init(void) if (isapnp[dev]) continue; #endif - if (snd_opl3sa2_probe(dev, NULL, NULL) >= 0) + if (snd_opl3sa2_probe(dev, NULL, NULL, NULL) >= 0) cards++; } #ifdef CONFIG_PNP + cards += pnp_register_driver(&opl3sa2_pnp_driver); cards += pnp_register_card_driver(&opl3sa2_pnpc_driver); #endif if (!cards) { From 53b266632b79e2adc48754d5a6e9e5ebbde08c12 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 12 Aug 2005 17:35:09 +0200 Subject: [PATCH 537/584] [ALSA] opl3sa2 - fixed typo in PnP BIOS IDs (YMF0021 -> YMH0021) OPL3SA2 driver Signed-off-by: Jaroslav Kysela --- sound/isa/opl3sa2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index c4ebf57b9fc3..bb5406cf0a19 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -146,7 +146,7 @@ static snd_card_t *snd_opl3sa2_legacy[SNDRV_CARDS] = SNDRV_DEFAULT_PTR; #ifdef CONFIG_PNP static struct pnp_device_id snd_opl3sa2_pnpbiosids[] = { - { .id = "YMF0021" }, + { .id = "YMH0021" }, { .id = "NMX2210" }, /* Gateway Solo 2500 */ { .id = "" } /* end */ }; From 1b05962e8b2d8a1b1f5934087a4a00f7532fa2d1 Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Fri, 12 Aug 2005 23:50:13 +0200 Subject: [PATCH 538/584] [ALSA] Add new ID. Fixes ALSA bug #1298 CA0106 driver Signed-off-by: James Courtier-Dutton --- sound/pci/ca0106/ca0106_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index 95c289284267..7e27bfc37439 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -188,6 +188,14 @@ static ca0106_details_t ca0106_chip_details[] = { .name = "MSI K8N Diamond MB [SB0438]", .gpio_type = 1, .i2c_adc = 1 } , + /* Shuttle XPC SD31P which has an onboard Creative Labs Sound Blaster Live! 24-bit EAX + * high-definition 7.1 audio processor". + * Added using info from andrewvegan in alsa bug #1298 + */ + { .serial = 0x30381297, + .name = "Shuttle XPC SD31P [SD31P]", + .gpio_type = 1, + .i2c_adc = 1 } , { .serial = 0, .name = "AudigyLS [Unknown]" } }; From 99250872fc619bb5b5ddddcf1c58714a774526fc Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Fri, 12 Aug 2005 23:54:58 +0200 Subject: [PATCH 539/584] [ALSA] Add new card ID. Fixes ALSA bug #1297 EMU10K1/EMU10K2 driver Signed-off-by: James Courtier-Dutton --- sound/pci/emu10k1/emu10k1_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 746b51ef3966..c0b67b70e345 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -747,11 +747,11 @@ static emu_chip_details_t emu_chip_details[] = { .emu10k1_chip = 1, .ac97_chip = 1, .sblive51 = 1} , + /* Tested by alsa bugtrack user "hus" 12th Sept 2005 */ {.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80611102, .driver = "EMU10K1", .name = "SBLive! Player 5.1 [SB0060]", .id = "Live", .emu10k1_chip = 1, - .ac97_chip = 1, .sblive51 = 1} , {.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80511102, .driver = "EMU10K1", .name = "SBLive! Value [CT4850]", From b263a9bdf9394062a4fc4272ebed60de331c5490 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:22:39 +0200 Subject: [PATCH 540/584] [ALSA] usb-audio: optimize handling of capture URBs USB generic driver When preparing capture URBs, we don't need to stop when we cross a period boundary because we now never handle more than one millisecond of data per URB anyway. When handling captured data, use an extra flag to call snd_pcm_period_elapsed() no more than once. This allows us to move the period boundary checking code before the copying of the data which avoids a second locking of the substream's lock. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index a62d1313da17..a703d96bfcb4 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -311,27 +311,18 @@ static int prepare_capture_urb(snd_usb_substream_t *subs, struct urb *urb) { int i, offs; - unsigned long flags; snd_urb_ctx_t *ctx = (snd_urb_ctx_t *)urb->context; offs = 0; urb->dev = ctx->subs->dev; /* we need to set this at each time */ - urb->number_of_packets = 0; - spin_lock_irqsave(&subs->lock, flags); for (i = 0; i < ctx->packets; i++) { urb->iso_frame_desc[i].offset = offs; urb->iso_frame_desc[i].length = subs->curpacksize; offs += subs->curpacksize; - urb->number_of_packets++; - subs->transfer_sched += subs->curframesize; - if (subs->transfer_sched >= runtime->period_size) { - subs->transfer_sched -= runtime->period_size; - break; - } } - spin_unlock_irqrestore(&subs->lock, flags); urb->transfer_buffer = ctx->buf; urb->transfer_buffer_length = offs; + urb->number_of_packets = ctx->packets; #if 0 // for check if (! urb->bandwidth) { int bustime; @@ -359,6 +350,7 @@ static int retire_capture_urb(snd_usb_substream_t *subs, unsigned char *cp; int i; unsigned int stride, len, oldptr; + int period_elapsed = 0; stride = runtime->frame_bits >> 3; @@ -378,6 +370,10 @@ static int retire_capture_urb(snd_usb_substream_t *subs, if (subs->hwptr_done >= runtime->buffer_size) subs->hwptr_done -= runtime->buffer_size; subs->transfer_done += len; + if (subs->transfer_done >= runtime->period_size) { + subs->transfer_done -= runtime->period_size; + period_elapsed = 1; + } spin_unlock_irqrestore(&subs->lock, flags); /* copy a data chunk */ if (oldptr + len > runtime->buffer_size) { @@ -388,15 +384,9 @@ static int retire_capture_urb(snd_usb_substream_t *subs, } else { memcpy(runtime->dma_area + oldptr * stride, cp, len * stride); } - /* update the pointer, call callback if necessary */ - spin_lock_irqsave(&subs->lock, flags); - if (subs->transfer_done >= runtime->period_size) { - subs->transfer_done -= runtime->period_size; - spin_unlock_irqrestore(&subs->lock, flags); - snd_pcm_period_elapsed(subs->pcm_substream); - } else - spin_unlock_irqrestore(&subs->lock, flags); } + if (period_elapsed) + snd_pcm_period_elapsed(subs->pcm_substream); return 0; } From 7efd8bc800324a967a37e8a425433468b7f06adb Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:24:44 +0200 Subject: [PATCH 541/584] [ALSA] usb-audio: double-buffer all playback data USB generic driver We always had to use double buffering when capturing, and when playback data for one URB crosses a buffer boundary. The latter would make hwptr updates less precise because the double-buffered data is read from the buffer much earlier than the other data is read by the host controller. Double-buffering all data allows to update hwptr immediately after the data was copied to the USB buffer(s), which has the additional benefit of avoiding the latency imposed by the host controller's delay of up to one frame when interrupting. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 151 +++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 86 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index a703d96bfcb4..2b4f916a0a9a 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -129,8 +130,6 @@ struct snd_urb_ctx { snd_usb_substream_t *subs; int index; /* index for urb array */ int packets; /* number of packets per urb */ - int transfer; /* transferred size */ - char *buf; /* buffer for capture */ }; struct snd_urb_ops { @@ -168,9 +167,7 @@ struct snd_usb_substream { unsigned int running: 1; /* running status */ - unsigned int hwptr; /* free frame position in the buffer (only for playback) */ unsigned int hwptr_done; /* processed frame position in the buffer */ - unsigned int transfer_sched; /* scheduled frames since last period (for playback) */ unsigned int transfer_done; /* processed frames since last period update */ unsigned long active_mask; /* bitmask of active urbs */ unsigned long unlink_mask; /* bitmask of unlinked urbs */ @@ -179,12 +176,12 @@ struct snd_usb_substream { snd_urb_ctx_t dataurb[MAX_URBS]; /* data urb table */ snd_urb_ctx_t syncurb[SYNC_URBS]; /* sync urb table */ char syncbuf[SYNC_URBS * 4]; /* sync buffer; it's so small - let's get static */ - char *tmpbuf; /* temporary buffer for playback */ u64 formats; /* format bitmasks (all or'ed) */ unsigned int num_formats; /* number of supported audio formats (list) */ struct list_head fmt_list; /* format list */ spinlock_t lock; + struct tasklet_struct start_period_elapsed; /* for start trigger */ struct snd_urb_ops ops; /* callbacks (must be filled at init) */ }; @@ -320,7 +317,6 @@ static int prepare_capture_urb(snd_usb_substream_t *subs, urb->iso_frame_desc[i].length = subs->curpacksize; offs += subs->curpacksize; } - urb->transfer_buffer = ctx->buf; urb->transfer_buffer_length = offs; urb->number_of_packets = ctx->packets; #if 0 // for check @@ -482,12 +478,10 @@ static int retire_playback_sync_urb_hs(snd_usb_substream_t *subs, /* * prepare urb for playback data pipe * - * we copy the data directly from the pcm buffer. - * the current position to be copied is held in hwptr field. - * since a urb can handle only a single linear buffer, if the total - * transferred area overflows the buffer boundary, we cannot send - * it directly from the buffer. thus the data is once copied to - * a temporary buffer and urb points to that. + * Since a URB can handle only a single linear buffer, we must use double + * buffering when the data to be transferred overflows the buffer boundary. + * To avoid inconsistencies when updating hwptr_done, we use double buffering + * for all URBs. */ static int prepare_playback_urb(snd_usb_substream_t *subs, snd_pcm_runtime_t *runtime, @@ -496,6 +490,7 @@ static int prepare_playback_urb(snd_usb_substream_t *subs, int i, stride, offs; unsigned int counts; unsigned long flags; + int period_elapsed = 0; snd_urb_ctx_t *ctx = (snd_urb_ctx_t *)urb->context; stride = runtime->frame_bits >> 3; @@ -520,21 +515,25 @@ static int prepare_playback_urb(snd_usb_substream_t *subs, urb->iso_frame_desc[i].length = counts * stride; offs += counts; urb->number_of_packets++; - subs->transfer_sched += counts; - if (subs->transfer_sched >= runtime->period_size) { - subs->transfer_sched -= runtime->period_size; + subs->transfer_done += counts; + if (subs->transfer_done >= runtime->period_size) { + subs->transfer_done -= runtime->period_size; + period_elapsed = 1; if (subs->fmt_type == USB_FORMAT_TYPE_II) { - if (subs->transfer_sched > 0) { - /* FIXME: fill-max mode is not supported yet */ - offs -= subs->transfer_sched; - counts -= subs->transfer_sched; - urb->iso_frame_desc[i].length = counts * stride; - subs->transfer_sched = 0; + if (subs->transfer_done > 0) { + /* FIXME: fill-max mode is not + * supported yet */ + offs -= subs->transfer_done; + counts -= subs->transfer_done; + urb->iso_frame_desc[i].length = + counts * stride; + subs->transfer_done = 0; } i++; if (i < ctx->packets) { /* add a transfer delimiter */ - urb->iso_frame_desc[i].offset = offs * stride; + urb->iso_frame_desc[i].offset = + offs * stride; urb->iso_frame_desc[i].length = 0; urb->number_of_packets++; } @@ -542,58 +541,55 @@ static int prepare_playback_urb(snd_usb_substream_t *subs, break; } } - if (subs->hwptr + offs > runtime->buffer_size) { - /* err, the transferred area goes over buffer boundary. - * copy the data to the temp buffer. - */ - int len; - len = runtime->buffer_size - subs->hwptr; - urb->transfer_buffer = subs->tmpbuf; - memcpy(subs->tmpbuf, runtime->dma_area + subs->hwptr * stride, len * stride); - memcpy(subs->tmpbuf + len * stride, runtime->dma_area, (offs - len) * stride); - subs->hwptr += offs; - subs->hwptr -= runtime->buffer_size; + if (subs->hwptr_done + offs > runtime->buffer_size) { + /* err, the transferred area goes over buffer boundary. */ + unsigned int len = runtime->buffer_size - subs->hwptr_done; + memcpy(urb->transfer_buffer, + runtime->dma_area + subs->hwptr_done * stride, + len * stride); + memcpy(urb->transfer_buffer + len * stride, + runtime->dma_area, + (offs - len) * stride); } else { - /* set the buffer pointer */ - urb->transfer_buffer = runtime->dma_area + subs->hwptr * stride; - subs->hwptr += offs; - if (subs->hwptr == runtime->buffer_size) - subs->hwptr = 0; + memcpy(urb->transfer_buffer, + runtime->dma_area + subs->hwptr_done * stride, + offs * stride); } + subs->hwptr_done += offs; + if (subs->hwptr_done >= runtime->buffer_size) + subs->hwptr_done -= runtime->buffer_size; spin_unlock_irqrestore(&subs->lock, flags); urb->transfer_buffer_length = offs * stride; - ctx->transfer = offs; - + if (period_elapsed) { + if (likely(subs->running)) + snd_pcm_period_elapsed(subs->pcm_substream); + else + tasklet_hi_schedule(&subs->start_period_elapsed); + } return 0; } /* * process after playback data complete - * - * update the current position and call callback if a period is processed. + * - nothing to do */ static int retire_playback_urb(snd_usb_substream_t *subs, snd_pcm_runtime_t *runtime, struct urb *urb) { - unsigned long flags; - snd_urb_ctx_t *ctx = (snd_urb_ctx_t *)urb->context; - - spin_lock_irqsave(&subs->lock, flags); - subs->transfer_done += ctx->transfer; - subs->hwptr_done += ctx->transfer; - ctx->transfer = 0; - if (subs->hwptr_done >= runtime->buffer_size) - subs->hwptr_done -= runtime->buffer_size; - if (subs->transfer_done >= runtime->period_size) { - subs->transfer_done -= runtime->period_size; - spin_unlock_irqrestore(&subs->lock, flags); - snd_pcm_period_elapsed(subs->pcm_substream); - } else - spin_unlock_irqrestore(&subs->lock, flags); return 0; } +/* + * Delay the snd_pcm_period_elapsed() call until after the start trigger + * callback so that we're not longer in the substream's lock. + */ +static void start_period_elapsed(unsigned long data) +{ + snd_usb_substream_t *subs = (snd_usb_substream_t *)data; + snd_pcm_period_elapsed(subs->pcm_substream); +} + /* */ @@ -848,11 +844,10 @@ static int snd_usb_pcm_trigger(snd_pcm_substream_t *substream, int cmd) static void release_urb_ctx(snd_urb_ctx_t *u) { if (u->urb) { + kfree(u->urb->transfer_buffer); usb_free_urb(u->urb); u->urb = NULL; } - kfree(u->buf); - u->buf = NULL; } /* @@ -870,8 +865,6 @@ static void release_substream_urbs(snd_usb_substream_t *subs, int force) release_urb_ctx(&subs->dataurb[i]); for (i = 0; i < SYNC_URBS; i++) release_urb_ctx(&subs->syncurb[i]); - kfree(subs->tmpbuf); - subs->tmpbuf = NULL; subs->nurbs = 0; } @@ -923,24 +916,15 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by urb_packs = 1; urb_packs *= packs_per_ms; - /* allocate a temporary buffer for playback */ - if (is_playback) { - subs->tmpbuf = kmalloc(maxsize * urb_packs, GFP_KERNEL); - if (! subs->tmpbuf) { - snd_printk(KERN_ERR "cannot malloc tmpbuf\n"); - return -ENOMEM; - } - } - /* decide how many packets to be used */ if (is_playback) { unsigned int minsize; /* determine how small a packet can be */ minsize = (subs->freqn >> (16 - subs->datainterval)) * (frame_bits >> 3); - /* with sync from device, assume it can be 25% lower */ + /* with sync from device, assume it can be 12% lower */ if (subs->syncpipe) - minsize -= minsize >> 2; + minsize -= minsize >> 3; minsize = max(minsize, 1u); total_packs = (period_bytes + minsize - 1) / minsize; /* round up to multiple of packs_per_ms */ @@ -989,27 +973,22 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by snd_urb_ctx_t *u = &subs->dataurb[i]; u->index = i; u->subs = subs; - u->transfer = 0; u->packets = npacks[i]; if (subs->fmt_type == USB_FORMAT_TYPE_II) u->packets++; /* for transfer delimiter */ - if (! is_playback) { - /* allocate a capture buffer per urb */ - u->buf = kmalloc(maxsize * u->packets, GFP_KERNEL); - if (! u->buf) { - release_substream_urbs(subs, 0); - return -ENOMEM; - } - } u->urb = usb_alloc_urb(u->packets, GFP_KERNEL); if (! u->urb) { release_substream_urbs(subs, 0); return -ENOMEM; } - u->urb->dev = subs->dev; + u->urb->transfer_buffer = kmalloc(maxsize * u->packets, + GFP_KERNEL); + if (! u->urb->transfer_buffer) { + release_substream_urbs(subs, 0); + return -ENOMEM; + } u->urb->pipe = subs->datapipe; u->urb->transfer_flags = URB_ISO_ASAP; - u->urb->number_of_packets = u->packets; u->urb->interval = 1 << subs->datainterval; u->urb->context = u; u->urb->complete = snd_usb_complete_callback(snd_complete_urb); @@ -1029,7 +1008,6 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by } u->urb->transfer_buffer = subs->syncbuf + i * 4; u->urb->transfer_buffer_length = 4; - u->urb->dev = subs->dev; u->urb->pipe = subs->syncpipe; u->urb->transfer_flags = URB_ISO_ASAP; u->urb->number_of_packets = 1; @@ -1386,9 +1364,7 @@ static int snd_usb_pcm_prepare(snd_pcm_substream_t *substream) subs->curframesize = bytes_to_frames(runtime, subs->curpacksize); /* reset the pointer */ - subs->hwptr = 0; subs->hwptr_done = 0; - subs->transfer_sched = 0; subs->transfer_done = 0; subs->phase = 0; @@ -2035,6 +2011,9 @@ static void init_substream(snd_usb_stream_t *as, int stream, struct audioformat INIT_LIST_HEAD(&subs->fmt_list); spin_lock_init(&subs->lock); + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + tasklet_init(&subs->start_period_elapsed, start_period_elapsed, + (unsigned long)subs); subs->stream = as; subs->direction = stream; From 9624ea812c7afd2e403c56366cadddb9ecfb88c6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:25:24 +0200 Subject: [PATCH 542/584] [ALSA] usb-audio: actually schedule playback URBs at frame boundaries USB generic driver Change prepare_playback_urb() not to stop preparing packets before a frame boundary has been reached. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 2b4f916a0a9a..3f7930c0b616 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -164,6 +164,7 @@ struct snd_usb_substream { unsigned int curframesize; /* current packet size in frames (for capture) */ unsigned int fill_max: 1; /* fill max packet size always */ unsigned int fmt_type; /* USB audio format type (1-3) */ + unsigned int packs_per_ms; /* packets per millisecond (for playback) */ unsigned int running: 1; /* running status */ @@ -537,9 +538,13 @@ static int prepare_playback_urb(snd_usb_substream_t *subs, urb->iso_frame_desc[i].length = 0; urb->number_of_packets++; } + break; } - break; } + /* finish at the frame boundary at/after the period boundary */ + if (period_elapsed && + (i & (subs->packs_per_ms - 1)) == subs->packs_per_ms - 1) + break; } if (subs->hwptr_done + offs > runtime->buffer_size) { /* err, the transferred area goes over buffer boundary. */ @@ -907,6 +912,7 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by packs_per_ms = 8 >> subs->datainterval; else packs_per_ms = 1; + subs->packs_per_ms = packs_per_ms; if (is_playback) { urb_packs = nrpacks; From daa150ef7d437d17973210f47a1c58623415df94 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:25:50 +0200 Subject: [PATCH 543/584] [ALSA] usb-audio: properly lock hwptr_done accesses USB generic driver Take the substream lock when reading hwptr_done to avoid a race condition with the updates in the URB callbacks. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 3f7930c0b616..8d4a085f642a 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -815,8 +815,14 @@ static int wait_clear_urbs(snd_usb_substream_t *subs) */ static snd_pcm_uframes_t snd_usb_pcm_pointer(snd_pcm_substream_t *substream) { - snd_usb_substream_t *subs = (snd_usb_substream_t *)substream->runtime->private_data; - return subs->hwptr_done; + snd_usb_substream_t *subs; + snd_pcm_uframes_t hwptr_done; + + subs = (snd_usb_substream_t *)substream->runtime->private_data; + spin_lock(&subs->lock); + hwptr_done = subs->hwptr_done; + spin_unlock(&subs->lock); + return hwptr_done; } From 55851f734d7af7e5362d43b60331c95c32a3b027 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:34:16 +0200 Subject: [PATCH 544/584] [ALSA] usb-audio: use usb_buffer_alloc/free USB generic driver Use the USB buffer allocation functions to avoid repeated DMA mappings of our buffers, which are re-used quite a lot. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 52 ++++++++++++++++++++++++++++---------------- sound/usb/usbmidi.c | 17 +++++++++++---- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 8d4a085f642a..13ff66b0a532 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -127,6 +127,7 @@ struct audioformat { struct snd_urb_ctx { struct urb *urb; + unsigned int buffer_size; /* size of data buffer, if data URB */ snd_usb_substream_t *subs; int index; /* index for urb array */ int packets; /* number of packets per urb */ @@ -176,7 +177,8 @@ struct snd_usb_substream { unsigned int nurbs; /* # urbs */ snd_urb_ctx_t dataurb[MAX_URBS]; /* data urb table */ snd_urb_ctx_t syncurb[SYNC_URBS]; /* sync urb table */ - char syncbuf[SYNC_URBS * 4]; /* sync buffer; it's so small - let's get static */ + char *syncbuf; /* sync buffer for all sync URBs */ + dma_addr_t sync_dma; /* DMA address of syncbuf */ u64 formats; /* format bitmasks (all or'ed) */ unsigned int num_formats; /* number of supported audio formats (list) */ @@ -855,7 +857,10 @@ static int snd_usb_pcm_trigger(snd_pcm_substream_t *substream, int cmd) static void release_urb_ctx(snd_urb_ctx_t *u) { if (u->urb) { - kfree(u->urb->transfer_buffer); + if (u->buffer_size) + usb_buffer_free(u->subs->dev, u->buffer_size, + u->urb->transfer_buffer, + u->urb->transfer_dma); usb_free_urb(u->urb); u->urb = NULL; } @@ -876,6 +881,9 @@ static void release_substream_urbs(snd_usb_substream_t *subs, int force) release_urb_ctx(&subs->dataurb[i]); for (i = 0; i < SYNC_URBS; i++) release_urb_ctx(&subs->syncurb[i]); + usb_buffer_free(subs->dev, SYNC_URBS * 4, + subs->syncbuf, subs->sync_dma); + subs->syncbuf = NULL; subs->nurbs = 0; } @@ -986,21 +994,19 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by u->index = i; u->subs = subs; u->packets = npacks[i]; + u->buffer_size = maxsize * u->packets; if (subs->fmt_type == USB_FORMAT_TYPE_II) u->packets++; /* for transfer delimiter */ u->urb = usb_alloc_urb(u->packets, GFP_KERNEL); - if (! u->urb) { - release_substream_urbs(subs, 0); - return -ENOMEM; - } - u->urb->transfer_buffer = kmalloc(maxsize * u->packets, - GFP_KERNEL); - if (! u->urb->transfer_buffer) { - release_substream_urbs(subs, 0); - return -ENOMEM; - } + if (! u->urb) + goto out_of_memory; + u->urb->transfer_buffer = + usb_buffer_alloc(subs->dev, u->buffer_size, GFP_KERNEL, + &u->urb->transfer_dma); + if (! u->urb->transfer_buffer) + goto out_of_memory; u->urb->pipe = subs->datapipe; - u->urb->transfer_flags = URB_ISO_ASAP; + u->urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; u->urb->interval = 1 << subs->datainterval; u->urb->context = u; u->urb->complete = snd_usb_complete_callback(snd_complete_urb); @@ -1008,20 +1014,24 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by if (subs->syncpipe) { /* allocate and initialize sync urbs */ + subs->syncbuf = usb_buffer_alloc(subs->dev, SYNC_URBS * 4, + GFP_KERNEL, &subs->sync_dma); + if (! subs->syncbuf) + goto out_of_memory; for (i = 0; i < SYNC_URBS; i++) { snd_urb_ctx_t *u = &subs->syncurb[i]; u->index = i; u->subs = subs; u->packets = 1; u->urb = usb_alloc_urb(1, GFP_KERNEL); - if (! u->urb) { - release_substream_urbs(subs, 0); - return -ENOMEM; - } + if (! u->urb) + goto out_of_memory; u->urb->transfer_buffer = subs->syncbuf + i * 4; + u->urb->transfer_dma = subs->sync_dma + i * 4; u->urb->transfer_buffer_length = 4; u->urb->pipe = subs->syncpipe; - u->urb->transfer_flags = URB_ISO_ASAP; + u->urb->transfer_flags = URB_ISO_ASAP | + URB_NO_TRANSFER_DMA_MAP; u->urb->number_of_packets = 1; u->urb->interval = 1 << subs->syncinterval; u->urb->context = u; @@ -1029,6 +1039,10 @@ static int init_substream_urbs(snd_usb_substream_t *subs, unsigned int period_by } } return 0; + +out_of_memory: + release_substream_urbs(subs, 0); + return -ENOMEM; } @@ -2036,7 +2050,7 @@ static void init_substream(snd_usb_stream_t *as, int stream, struct audioformat subs->ops = audio_urb_ops_high_speed[stream]; snd_pcm_lib_preallocate_pages(as->pcm->streams[stream].substream, SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), + snd_dma_continuous_data(GFP_NOIO), 64 * 1024, 128 * 1024); snd_pcm_set_ops(as->pcm, stream, stream == SNDRV_PCM_STREAM_PLAYBACK ? diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index 5778a9b725ec..d0d895df5375 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -765,7 +765,10 @@ static snd_rawmidi_ops_t snd_usbmidi_input_ops = { static void snd_usbmidi_in_endpoint_delete(snd_usb_midi_in_endpoint_t* ep) { if (ep->urb) { - kfree(ep->urb->transfer_buffer); + usb_buffer_free(ep->umidi->chip->dev, + ep->urb->transfer_buffer_length, + ep->urb->transfer_buffer, + ep->urb->transfer_dma); usb_free_urb(ep->urb); } kfree(ep); @@ -799,7 +802,8 @@ static int snd_usbmidi_in_endpoint_create(snd_usb_midi_t* umidi, else pipe = usb_rcvbulkpipe(umidi->chip->dev, ep_info->in_ep); length = usb_maxpacket(umidi->chip->dev, pipe, 0); - buffer = kmalloc(length, GFP_KERNEL); + buffer = usb_buffer_alloc(umidi->chip->dev, length, GFP_KERNEL, + &ep->urb->transfer_dma); if (!buffer) { snd_usbmidi_in_endpoint_delete(ep); return -ENOMEM; @@ -812,6 +816,7 @@ static int snd_usbmidi_in_endpoint_create(snd_usb_midi_t* umidi, usb_fill_bulk_urb(ep->urb, umidi->chip->dev, pipe, buffer, length, snd_usb_complete_callback(snd_usbmidi_in_urb_complete), ep); + ep->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; rep->in = ep; return 0; @@ -835,7 +840,9 @@ static void snd_usbmidi_out_endpoint_delete(snd_usb_midi_out_endpoint_t* ep) if (ep->tasklet.func) tasklet_kill(&ep->tasklet); if (ep->urb) { - kfree(ep->urb->transfer_buffer); + usb_buffer_free(ep->umidi->chip->dev, ep->max_transfer, + ep->urb->transfer_buffer, + ep->urb->transfer_dma); usb_free_urb(ep->urb); } kfree(ep); @@ -867,7 +874,8 @@ static int snd_usbmidi_out_endpoint_create(snd_usb_midi_t* umidi, /* we never use interrupt output pipes */ pipe = usb_sndbulkpipe(umidi->chip->dev, ep_info->out_ep); ep->max_transfer = usb_maxpacket(umidi->chip->dev, pipe, 1); - buffer = kmalloc(ep->max_transfer, GFP_KERNEL); + buffer = usb_buffer_alloc(umidi->chip->dev, ep->max_transfer, + GFP_KERNEL, &ep->urb->transfer_dma); if (!buffer) { snd_usbmidi_out_endpoint_delete(ep); return -ENOMEM; @@ -875,6 +883,7 @@ static int snd_usbmidi_out_endpoint_create(snd_usb_midi_t* umidi, usb_fill_bulk_urb(ep->urb, umidi->chip->dev, pipe, buffer, ep->max_transfer, snd_usb_complete_callback(snd_usbmidi_out_urb_complete), ep); + ep->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; spin_lock_init(&ep->buffer_lock); tasklet_init(&ep->tasklet, snd_usbmidi_out_tasklet, (unsigned long)ep); From 6207e51b79f89352a8623f806df5bfafb012e489 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 08:35:25 +0200 Subject: [PATCH 545/584] [ALSA] usb-audio: use vmalloc for the PCM buffer USB generic driver With the double buffering, we no longer need contiguous memory for the PCM buffer, so we can use vmalloc() instead of the preallocation functions, and increase the maximum size to 256 KB. Signed-off-by: Clemens Ladisch --- sound/usb/usbaudio.c | 53 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 13ff66b0a532..5aa5fe651a8a 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -676,6 +677,42 @@ static void snd_complete_sync_urb(struct urb *urb, struct pt_regs *regs) } +/* get the physical page pointer at the given offset */ +static struct page *snd_pcm_get_vmalloc_page(snd_pcm_substream_t *subs, + unsigned long offset) +{ + void *pageptr = subs->runtime->dma_area + offset; + return vmalloc_to_page(pageptr); +} + +/* allocate virtual buffer; may be called more than once */ +static int snd_pcm_alloc_vmalloc_buffer(snd_pcm_substream_t *subs, size_t size) +{ + snd_pcm_runtime_t *runtime = subs->runtime; + if (runtime->dma_area) { + if (runtime->dma_bytes >= size) + return 0; /* already large enough */ + vfree_nocheck(runtime->dma_area); + } + runtime->dma_area = vmalloc_nocheck(size); + if (! runtime->dma_area) + return -ENOMEM; + runtime->dma_bytes = size; + return 0; +} + +/* free virtual buffer; may be called more than once */ +static int snd_pcm_free_vmalloc_buffer(snd_pcm_substream_t *subs) +{ + snd_pcm_runtime_t *runtime = subs->runtime; + if (runtime->dma_area) { + vfree_nocheck(runtime->dma_area); + runtime->dma_area = NULL; + } + return 0; +} + + /* * unlink active urbs. */ @@ -1311,7 +1348,8 @@ static int snd_usb_hw_params(snd_pcm_substream_t *substream, unsigned int channels, rate, format; int ret, changed; - ret = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params)); + ret = snd_pcm_alloc_vmalloc_buffer(substream, + params_buffer_bytes(hw_params)); if (ret < 0) return ret; @@ -1367,7 +1405,7 @@ static int snd_usb_hw_free(snd_pcm_substream_t *substream) subs->cur_rate = 0; subs->period_bytes = 0; release_substream_urbs(subs, 0); - return snd_pcm_lib_free_pages(substream); + return snd_pcm_free_vmalloc_buffer(substream); } /* @@ -1406,7 +1444,7 @@ static snd_pcm_hardware_t snd_usb_playback = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID), - .buffer_bytes_max = (128*1024), + .buffer_bytes_max = (256*1024), .period_bytes_min = 64, .period_bytes_max = (128*1024), .periods_min = 2, @@ -1418,7 +1456,7 @@ static snd_pcm_hardware_t snd_usb_capture = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID), - .buffer_bytes_max = (128*1024), + .buffer_bytes_max = (256*1024), .period_bytes_min = 64, .period_bytes_max = (128*1024), .periods_min = 2, @@ -1810,6 +1848,7 @@ static snd_pcm_ops_t snd_usb_playback_ops = { .prepare = snd_usb_pcm_prepare, .trigger = snd_usb_pcm_trigger, .pointer = snd_usb_pcm_pointer, + .page = snd_pcm_get_vmalloc_page, }; static snd_pcm_ops_t snd_usb_capture_ops = { @@ -1821,6 +1860,7 @@ static snd_pcm_ops_t snd_usb_capture_ops = { .prepare = snd_usb_pcm_prepare, .trigger = snd_usb_pcm_trigger, .pointer = snd_usb_pcm_pointer, + .page = snd_pcm_get_vmalloc_page, }; @@ -2048,10 +2088,6 @@ static void init_substream(snd_usb_stream_t *as, int stream, struct audioformat subs->ops = audio_urb_ops[stream]; else subs->ops = audio_urb_ops_high_speed[stream]; - snd_pcm_lib_preallocate_pages(as->pcm->streams[stream].substream, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_NOIO), - 64 * 1024, 128 * 1024); snd_pcm_set_ops(as->pcm, stream, stream == SNDRV_PCM_STREAM_PLAYBACK ? &snd_usb_playback_ops : &snd_usb_capture_ops); @@ -2097,7 +2133,6 @@ static void snd_usb_audio_pcm_free(snd_pcm_t *pcm) snd_usb_stream_t *stream = pcm->private_data; if (stream) { stream->pcm = NULL; - snd_pcm_lib_preallocate_free_for_all(pcm); snd_usb_audio_stream_free(stream); } } From 3e6c6139f0cf9c196c3fd700055f2e22ca22e726 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 15 Aug 2005 09:13:32 +0200 Subject: [PATCH 546/584] [ALSA] ac97: make patch_wolfson_wm97??_specific() functions static AC97 Codec Make the patch_wolfson_wm97??_specific() functions static again. This patch was accidentally reverted in rev. 1.92. Signed-off-by: Clemens Ladisch --- sound/pci/ac97/ac97_patch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index a0d515536177..616504ef3702 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -375,7 +375,7 @@ AC97_DOUBLE("Front Playback Volume", AC97_WM97XX_FMIXER_VOL, 8, 0, 31, 1), AC97_SINGLE("Front Playback Switch", AC97_WM97XX_FMIXER_VOL, 15, 1, 1), }; -int patch_wolfson_wm9703_specific(ac97_t * ac97) +static int patch_wolfson_wm9703_specific(ac97_t * ac97) { /* This is known to work for the ViewSonic ViewPad 1000 * Randolph Bentson @@ -410,7 +410,7 @@ AC97_DOUBLE("Rear DAC Volume", AC97_WM9704_RPCM_VOL, 8, 0, 31, 1), AC97_DOUBLE("Surround Volume", AC97_SURROUND_MASTER, 8, 0, 31, 1), }; -int patch_wolfson_wm9704_specific(ac97_t * ac97) +static int patch_wolfson_wm9704_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm9704_snd_ac97_controls); i++) { @@ -433,7 +433,7 @@ int patch_wolfson04(ac97_t * ac97) return 0; } -int patch_wolfson_wm9705_specific(ac97_t * ac97) +static int patch_wolfson_wm9705_specific(ac97_t * ac97) { int err, i; for (i = 0; i < ARRAY_SIZE(wm97xx_snd_ac97_controls); i++) { @@ -558,7 +558,7 @@ AC97_SINGLE("Headphone ZC Switch", AC97_HEADPHONE, 7, 1, 0), AC97_SINGLE("Mono ZC Switch", AC97_MASTER_MONO, 7, 1, 0), }; -int patch_wolfson_wm9711_specific(ac97_t * ac97) +static int patch_wolfson_wm9711_specific(ac97_t * ac97) { int err, i; From cff79742fc9e80242a9147a348621e4373c76287 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 15 Aug 2005 13:08:04 +0200 Subject: [PATCH 547/584] [ALSA] opl3sa2 - use different name for PnP BIOS driver OPL3SA2 driver - use different name (opl3sa2-pnpbios) for PnP BIOS driver - don't print warning when manual config of resources fails (PnP BIOS) Signed-off-by: Jaroslav Kysela --- sound/isa/opl3sa2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index bb5406cf0a19..75bd6eca63e7 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -607,7 +607,7 @@ static int __init snd_opl3sa2_pnp(int dev, opl3sa2_t *chip, if (irq[dev] != SNDRV_AUTO_IRQ) pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1); err = pnp_manual_config_dev(pdev, cfg, 0); - if (err < 0) + if (err < 0 && isapnp) snd_printk(KERN_ERR "PnP manual resources are invalid, using auto config\n"); err = pnp_activate_dev(pdev); if (err < 0) { @@ -839,7 +839,7 @@ static void __devexit snd_opl3sa2_pnp_remove(struct pnp_dev * pdev) } static struct pnp_driver opl3sa2_pnp_driver = { - .name = "opl3sa2", + .name = "opl3sa2-pnpbios", .id_table = snd_opl3sa2_pnpbiosids, .probe = snd_opl3sa2_pnp_detect, .remove = __devexit_p(snd_opl3sa2_pnp_remove), From b27113102f576092cd8f5d6ce8365aa6e2f58134 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Aug 2005 15:01:10 +0200 Subject: [PATCH 548/584] [ALSA] Fix PCM 32bit compat layer PCM Midlevel Fixed the handling of boundary in PCM 32bit compat layer. Positions in hwsync are bound in the 32bit boundary size. Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 42 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 3920bf0eebbf..eef94a15f50a 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -103,10 +103,24 @@ struct sndrv_pcm_sw_params32 { unsigned char reserved[64]; }; +/* recalcuate the boundary within 32bit */ +static snd_pcm_uframes_t recalculate_boundary(snd_pcm_runtime_t *runtime) +{ + snd_pcm_uframes_t boundary; + + if (! runtime->buffer_size) + return 0; + boundary = runtime->buffer_size; + while (boundary * 2 <= 0x7fffffffUL - runtime->buffer_size) + boundary *= 2; + return boundary; +} + static int snd_pcm_ioctl_sw_params_compat(snd_pcm_substream_t *substream, struct sndrv_pcm_sw_params32 __user *src) { snd_pcm_sw_params_t params; + snd_pcm_uframes_t boundary; int err; memset(¶ms, 0, sizeof(params)); @@ -120,10 +134,17 @@ static int snd_pcm_ioctl_sw_params_compat(snd_pcm_substream_t *substream, get_user(params.silence_threshold, &src->silence_threshold) || get_user(params.silence_size, &src->silence_size)) return -EFAULT; + /* + * Check silent_size parameter. Since we have 64bit boundary, + * silence_size must be compared with the 32bit boundary. + */ + boundary = recalculate_boundary(substream->runtime); + if (boundary && params.silence_size >= boundary) + params.silence_size = substream->runtime->boundary; err = snd_pcm_sw_params(substream, ¶ms); if (err < 0) return err; - if (put_user(params.boundary, &src->boundary)) + if (put_user(boundary, &src->boundary)) return -EFAULT; return err; } @@ -199,16 +220,6 @@ static int snd_pcm_status_user_compat(snd_pcm_substream_t *substream, return err; } -/* recalcuate the boundary within 32bit */ -static void recalculate_boundary(snd_pcm_runtime_t *runtime) -{ - if (! runtime->buffer_size) - return; - runtime->boundary = runtime->buffer_size; - while (runtime->boundary * 2 <= 0x7fffffffUL - runtime->buffer_size) - runtime->boundary *= 2; -} - /* both for HW_PARAMS and HW_REFINE */ static int snd_pcm_ioctl_hw_params_compat(snd_pcm_substream_t *substream, int refine, @@ -242,7 +253,7 @@ static int snd_pcm_ioctl_hw_params_compat(snd_pcm_substream_t *substream, } if (! refine) - recalculate_boundary(runtime); + runtime->boundary = recalculate_boundary(runtime); error: kfree(data); return err; @@ -380,6 +391,7 @@ static int snd_pcm_ioctl_sync_ptr_compat(snd_pcm_substream_t *substream, u32 sflags; struct sndrv_pcm_mmap_control scontrol; struct sndrv_pcm_mmap_status sstatus; + snd_pcm_uframes_t boundary; int err; snd_assert(runtime, return -EINVAL); @@ -395,17 +407,19 @@ static int snd_pcm_ioctl_sync_ptr_compat(snd_pcm_substream_t *substream, } status = runtime->status; control = runtime->control; + boundary = recalculate_boundary(runtime); snd_pcm_stream_lock_irq(substream); + /* FIXME: we should consider the boundary for the sync from app */ if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) control->appl_ptr = scontrol.appl_ptr; else - scontrol.appl_ptr = control->appl_ptr; + scontrol.appl_ptr = control->appl_ptr % boundary; if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; else scontrol.avail_min = control->avail_min; sstatus.state = status->state; - sstatus.hw_ptr = status->hw_ptr; + sstatus.hw_ptr = status->hw_ptr % boundary; sstatus.tstamp = status->tstamp; sstatus.suspended_state = status->suspended_state; snd_pcm_stream_unlock_irq(substream); From 16d3f140fc265c9b9c0f8975e0b36fe15912508f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Aug 2005 15:02:28 +0200 Subject: [PATCH 549/584] [ALSA] via82xx - Fix SPDIF sample rates VIA82xx driver Fixed the sample rates set in the fourth DXS channel with Non-VRA mode. Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 38b96eabea60..819f27b2762b 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -929,12 +929,12 @@ static int snd_via8233_playback_prepare(snd_pcm_substream_t *substream) if ((rate_changed = via_lock_rate(&chip->rates[0], ac97_rate)) < 0) return rate_changed; - if (rate_changed) { + if (rate_changed) snd_ac97_set_rate(chip->ac97, AC97_PCM_FRONT_DAC_RATE, chip->no_vra ? 48000 : runtime->rate); - snd_ac97_set_rate(chip->ac97, AC97_SPDIF, - chip->no_vra ? 48000 : runtime->rate); - } + if (chip->spdif_on && viadev->reg_offset == 0x30) + snd_ac97_set_rate(chip->ac97, AC97_SPDIF, runtime->rate); + if (runtime->rate == 48000) rbits = 0xfffff; else From 5a47fe3c5e8f394fb1578bbe8117f3dcc250b6fd Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 15 Aug 2005 20:01:40 +0200 Subject: [PATCH 550/584] [ALSA] hda-patch-realtek - added pci subdevice ID for Acer TravelMate 8100 (3 stack model + digital out) HDA Codec driver Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9b8569900787..d19da2bae663 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1524,6 +1524,7 @@ static struct hda_board_config alc880_cfg_tbl[] = { /* Back 3 jack plus 1 SPDIF out jack, front 2 jack */ { .modelname = "3stack-digout", .config = ALC880_3ST_DIG }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe308, .config = ALC880_3ST_DIG }, + { .pci_subvendor = 0x1025, .pci_subdevice = 0x0070, .config = ALC880_3ST_DIG }, /* Back 3 jack plus 1 SPDIF out jack, front 2 jack (Internal add Aux-In)*/ { .pci_subvendor = 0x8086, .pci_subdevice = 0xe305, .config = ALC880_3ST_DIG }, From 47123197c5522f4ae3dc5914e7832dd047f9ddc8 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 15 Aug 2005 20:53:07 +0200 Subject: [PATCH 551/584] [ALSA] hda-intel: Suspend/resume fixes for PCM devices HDA Intel driver - removed SNDRV_PCM_INFO_RESUME (the driver cannot do PCM resume at the time) - fixed chip->pcm_devs initialization Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2b6bd3139b86..58e15b8896ac 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -900,8 +900,8 @@ static snd_pcm_hardware_t azx_pcm_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE | - SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /*|*/ + /*SNDRV_PCM_INFO_RESUME*/), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, @@ -1050,6 +1050,7 @@ static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd) azx_dev->running = 1; break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: azx_stream_stop(chip, azx_dev); azx_dev->running = 0; @@ -1059,6 +1060,7 @@ static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd) } spin_unlock(&chip->reg_lock); if (cmd == SNDRV_PCM_TRIGGER_PAUSE_PUSH || + cmd == SNDRV_PCM_TRIGGER_SUSPEND || cmd == SNDRV_PCM_TRIGGER_STOP) { int timeout = 5000; while (azx_sd_readb(azx_dev, SD_CTL) & SD_CTL_DMA_START && --timeout) @@ -1137,6 +1139,7 @@ static int __devinit create_codec_pcm(azx_t *chip, struct hda_codec *codec, snd_dma_pci_data(chip->pci), 1024 * 64, 1024 * 128); chip->pcm[pcm_dev] = pcm; + chip->pcm_devs = pcm_dev + 1; return 0; } From a501dfa3a763451dedd583eb90a6c9e90d0e3a3c Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 16 Aug 2005 11:09:05 +0200 Subject: [PATCH 552/584] [ALSA] Timer API - added SUSPEND/RESUME events PCM Midlevel,Timer Midlevel,ALSA Core - added SNDRV_TIMER_EVENT_SUSPEND / RESUME events - changed timer events from PAUSE / CONTINUE in PCM midlevel to SUSPEND / RESUME Signed-off-by: Jaroslav Kysela --- include/sound/asound.h | 6 +++++- sound/core/pcm_native.c | 4 ++-- sound/core/timer.c | 12 +++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/sound/asound.h b/include/sound/asound.h index 9974f83cca44..694b6e66efcb 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -560,7 +560,7 @@ enum { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum sndrv_timer_class { SNDRV_TIMER_CLASS_NONE = -1, @@ -693,11 +693,15 @@ enum sndrv_timer_event { SNDRV_TIMER_EVENT_CONTINUE, /* val = resolution in ns */ SNDRV_TIMER_EVENT_PAUSE, /* val = 0 */ SNDRV_TIMER_EVENT_EARLY, /* val = 0, early event */ + SNDRV_TIMER_EVENT_SUSPEND, /* val = 0 */ + SNDRV_TIMER_EVENT_RESUME, /* val = 0 */ /* master timer events for slave timer instances */ SNDRV_TIMER_EVENT_MSTART = SNDRV_TIMER_EVENT_START + 10, SNDRV_TIMER_EVENT_MSTOP = SNDRV_TIMER_EVENT_STOP + 10, SNDRV_TIMER_EVENT_MCONTINUE = SNDRV_TIMER_EVENT_CONTINUE + 10, SNDRV_TIMER_EVENT_MPAUSE = SNDRV_TIMER_EVENT_PAUSE + 10, + SNDRV_TIMER_EVENT_MSUSPEND = SNDRV_TIMER_EVENT_SUSPEND + 10, + SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10, }; struct sndrv_timer_tread { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 5041be25d75b..03c17159dd8e 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1025,7 +1025,7 @@ static void snd_pcm_post_suspend(snd_pcm_substream_t *substream, int state) snd_pcm_runtime_t *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); if (substream->timer) - snd_timer_notify(substream->timer, SNDRV_TIMER_EVENT_MPAUSE, &runtime->trigger_tstamp); + snd_timer_notify(substream->timer, SNDRV_TIMER_EVENT_MSUSPEND, &runtime->trigger_tstamp); runtime->status->suspended_state = runtime->status->state; runtime->status->state = SNDRV_PCM_STATE_SUSPENDED; snd_pcm_tick_set(substream, 0); @@ -1115,7 +1115,7 @@ static void snd_pcm_post_resume(snd_pcm_substream_t *substream, int state) snd_pcm_runtime_t *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); if (substream->timer) - snd_timer_notify(substream->timer, SNDRV_TIMER_EVENT_MCONTINUE, &runtime->trigger_tstamp); + snd_timer_notify(substream->timer, SNDRV_TIMER_EVENT_MRESUME, &runtime->trigger_tstamp); runtime->status->state = runtime->status->suspended_state; if (runtime->sleep_min) snd_pcm_tick_prepare(substream); diff --git a/sound/core/timer.c b/sound/core/timer.c index c3997e047214..56bd383a06c2 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -880,9 +880,11 @@ void snd_timer_notify(snd_timer_t *timer, enum sndrv_timer_event event, struct t struct list_head *p, *n; snd_runtime_check(timer->hw.flags & SNDRV_TIMER_HW_SLAVE, return); - snd_assert(event >= SNDRV_TIMER_EVENT_MSTART && event <= SNDRV_TIMER_EVENT_MPAUSE, return); + snd_assert(event >= SNDRV_TIMER_EVENT_MSTART && event <= SNDRV_TIMER_EVENT_MRESUME, return); spin_lock_irqsave(&timer->lock, flags); - if (event == SNDRV_TIMER_EVENT_MSTART || event == SNDRV_TIMER_EVENT_MCONTINUE) { + if (event == SNDRV_TIMER_EVENT_MSTART || + event == SNDRV_TIMER_EVENT_MCONTINUE || + event == SNDRV_TIMER_EVENT_MRESUME) { if (timer->hw.c_resolution) resolution = timer->hw.c_resolution(timer); else @@ -1555,10 +1557,14 @@ static int snd_timer_user_params(struct file *file, snd_timer_params_t __user *_ (1< Date: Tue, 16 Aug 2005 11:32:04 +0200 Subject: [PATCH 553/584] [ALSA] Timer API - SNDRV_TIMER_EVENT_RESUME - val is resolution in ns ALSA Core Signed-off-by: Jaroslav Kysela --- include/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/asound.h b/include/sound/asound.h index 694b6e66efcb..8e552d627fa5 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -694,7 +694,7 @@ enum sndrv_timer_event { SNDRV_TIMER_EVENT_PAUSE, /* val = 0 */ SNDRV_TIMER_EVENT_EARLY, /* val = 0, early event */ SNDRV_TIMER_EVENT_SUSPEND, /* val = 0 */ - SNDRV_TIMER_EVENT_RESUME, /* val = 0 */ + SNDRV_TIMER_EVENT_RESUME, /* val = resolution in ns */ /* master timer events for slave timer instances */ SNDRV_TIMER_EVENT_MSTART = SNDRV_TIMER_EVENT_START + 10, SNDRV_TIMER_EVENT_MSTOP = SNDRV_TIMER_EVENT_STOP + 10, From 65d11d95515db3efb629202104cfc672476618b5 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 16 Aug 2005 13:05:43 +0200 Subject: [PATCH 554/584] [ALSA] ALSA timer - fixed compilation Timer Midlevel Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 56bd383a06c2..4104f6e292e9 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1562,8 +1562,8 @@ static int snd_timer_user_params(struct file *file, snd_timer_params_t __user *_ (1< Date: Tue, 16 Aug 2005 16:52:24 +0200 Subject: [PATCH 555/584] [ALSA] intel8x0 - Fix PM Intel8x0 driver Fixed the PCM resume of intel8x0. Restores the requested register setting. Signed-off-by: Takashi Iwai --- sound/pci/intel8x0.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index d7af3e474432..390b6c8f9a10 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -389,6 +389,7 @@ typedef struct { struct ac97_pcm *pcm; int pcm_open_flag; unsigned int page_attr_changed: 1; + unsigned int suspended: 1; } ichdev_t; typedef struct _snd_intel8x0 intel8x0_t; @@ -862,12 +863,16 @@ static int snd_intel8x0_pcm_trigger(snd_pcm_substream_t *substream, int cmd) unsigned long port = ichdev->reg_offset; switch (cmd) { - case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: + ichdev->suspended = 0; + /* fallthru */ + case SNDRV_PCM_TRIGGER_START: val = ICH_IOCE | ICH_STARTBM; break; - case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: + ichdev->suspended = 1; + /* fallthru */ + case SNDRV_PCM_TRIGGER_STOP: val = 0; break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: @@ -899,9 +904,11 @@ static int snd_intel8x0_ali_trigger(snd_pcm_substream_t *substream, int cmd) val = igetdword(chip, ICHREG(ALI_DMACR)); switch (cmd) { + case SNDRV_PCM_TRIGGER_RESUME: + ichdev->suspended = 0; + /* fallthru */ case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - case SNDRV_PCM_TRIGGER_RESUME: if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { /* clear FIFO for synchronization of channels */ fifo = igetdword(chip, fiforeg[ichdev->ali_slot / 4]); @@ -913,9 +920,11 @@ static int snd_intel8x0_ali_trigger(snd_pcm_substream_t *substream, int cmd) val &= ~(1 << (ichdev->ali_slot + 16)); /* clear PAUSE flag */ iputdword(chip, ICHREG(ALI_DMACR), val | (1 << ichdev->ali_slot)); /* start DMA */ break; + case SNDRV_PCM_TRIGGER_SUSPEND: + ichdev->suspended = 1; + /* fallthru */ case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - case SNDRV_PCM_TRIGGER_SUSPEND: iputdword(chip, ICHREG(ALI_DMACR), val | (1 << (ichdev->ali_slot + 16))); /* pause */ iputbyte(chip, port + ICH_REG_OFF_CR, 0); while (igetbyte(chip, port + ICH_REG_OFF_CR)) @@ -994,6 +1003,8 @@ static void snd_intel8x0_setup_pcm_out(intel8x0_t *chip, { unsigned int cnt; int dbl = runtime->rate > 48000; + + spin_lock_irq(&chip->reg_lock); switch (chip->device_type) { case DEVICE_ALI: cnt = igetdword(chip, ICHREG(ALI_SCR)); @@ -1037,6 +1048,7 @@ static void snd_intel8x0_setup_pcm_out(intel8x0_t *chip, iputdword(chip, ICHREG(GLOB_CNT), cnt); break; } + spin_unlock_irq(&chip->reg_lock); } static int snd_intel8x0_pcm_prepare(snd_pcm_substream_t * substream) @@ -1048,15 +1060,12 @@ static int snd_intel8x0_pcm_prepare(snd_pcm_substream_t * substream) ichdev->physbuf = runtime->dma_addr; ichdev->size = snd_pcm_lib_buffer_bytes(substream); ichdev->fragsize = snd_pcm_lib_period_bytes(substream); - spin_lock_irq(&chip->reg_lock); if (ichdev->ichd == ICHD_PCMOUT) { snd_intel8x0_setup_pcm_out(chip, runtime); - if (chip->device_type == DEVICE_INTEL_ICH4) { + if (chip->device_type == DEVICE_INTEL_ICH4) ichdev->pos_shift = (runtime->sample_bits > 16) ? 2 : 1; - } } snd_intel8x0_setup_periods(chip, ichdev); - spin_unlock_irq(&chip->reg_lock); return 0; } @@ -2424,6 +2433,20 @@ static int intel8x0_resume(snd_card_t *card) } } + /* resume status */ + for (i = 0; i < chip->bdbars_count; i++) { + ichdev_t *ichdev = &chip->ichd[i]; + unsigned long port = ichdev->reg_offset; + if (! ichdev->substream || ! ichdev->suspended) + continue; + if (ichdev->ichd == ICHD_PCMOUT) + snd_intel8x0_setup_pcm_out(chip, ichdev->substream->runtime); + iputdword(chip, port + ICH_REG_OFF_BDBAR, ichdev->bdbar_addr); + iputbyte(chip, port + ICH_REG_OFF_LVI, ichdev->lvi); + iputbyte(chip, port + ICH_REG_OFF_CIV, ichdev->civ); + iputbyte(chip, port + ichdev->roff_sr, ICH_FIFOE | ICH_BCIS | ICH_LVBCI); + } + return 0; } #endif /* CONFIG_PM */ From 1204de32d0df87892e56062042e25c775ca0e08c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Aug 2005 16:54:12 +0200 Subject: [PATCH 556/584] [ALSA] nm256 - Fix PM and irq handling NM256 driver - Fixed the PCM resume - restoring the rate setting - Fixed the handling of buggy irqs - Dynamically acquire/release irq handler to make the driver more robust to unknown irq storms (as OSS driver does). Signed-off-by: Takashi Iwai --- sound/pci/nm256/nm256.c | 93 +++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c index 7eb20b8f89f6..2bbeb10ff7c4 100644 --- a/sound/pci/nm256/nm256.c +++ b/sound/pci/nm256/nm256.c @@ -189,6 +189,7 @@ struct snd_nm256_stream { nm256_t *chip; snd_pcm_substream_t *substream; int running; + int suspended; u32 buf; /* offset from chip->buffer */ int bufsize; /* buffer size in bytes */ @@ -231,8 +232,10 @@ struct snd_nm256 { int mixer_status_mask; /* bit mask to test the mixer status */ int irq; + int irq_acks; irqreturn_t (*interrupt)(int, void *, struct pt_regs *); int badintrcount; /* counter to check bogus interrupts */ + struct semaphore irq_mutex; nm256_stream_t streams[2]; @@ -464,6 +467,37 @@ snd_nm256_set_format(nm256_t *chip, nm256_stream_t *s, snd_pcm_substream_t *subs } } +/* acquire interrupt */ +static int snd_nm256_acquire_irq(nm256_t *chip) +{ + down(&chip->irq_mutex); + if (chip->irq < 0) { + if (request_irq(chip->pci->irq, chip->interrupt, SA_INTERRUPT|SA_SHIRQ, + chip->card->driver, (void*)chip)) { + snd_printk("unable to grab IRQ %d\n", chip->pci->irq); + up(&chip->irq_mutex); + return -EBUSY; + } + chip->irq = chip->pci->irq; + } + chip->irq_acks++; + up(&chip->irq_mutex); + return 0; +} + +/* release interrupt */ +static void snd_nm256_release_irq(nm256_t *chip) +{ + down(&chip->irq_mutex); + if (chip->irq_acks > 0) + chip->irq_acks--; + if (chip->irq_acks == 0 && chip->irq >= 0) { + free_irq(chip->irq, (void*)chip); + chip->irq = -1; + } + up(&chip->irq_mutex); +} + /* * start / stop */ @@ -538,15 +572,19 @@ snd_nm256_playback_trigger(snd_pcm_substream_t *substream, int cmd) spin_lock(&chip->reg_lock); switch (cmd) { - case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: + s->suspended = 0; + /* fallthru */ + case SNDRV_PCM_TRIGGER_START: if (! s->running) { snd_nm256_playback_start(chip, s, substream); s->running = 1; } break; - case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: + s->suspended = 1; + /* fallthru */ + case SNDRV_PCM_TRIGGER_STOP: if (s->running) { snd_nm256_playback_stop(chip); s->running = 0; @@ -818,6 +856,8 @@ snd_nm256_playback_open(snd_pcm_substream_t *substream) { nm256_t *chip = snd_pcm_substream_chip(substream); + if (snd_nm256_acquire_irq(chip) < 0) + return -EBUSY; snd_nm256_setup_stream(chip, &chip->streams[SNDRV_PCM_STREAM_PLAYBACK], substream, &snd_nm256_playback); return 0; @@ -828,6 +868,8 @@ snd_nm256_capture_open(snd_pcm_substream_t *substream) { nm256_t *chip = snd_pcm_substream_chip(substream); + if (snd_nm256_acquire_irq(chip) < 0) + return -EBUSY; snd_nm256_setup_stream(chip, &chip->streams[SNDRV_PCM_STREAM_CAPTURE], substream, &snd_nm256_capture); return 0; @@ -839,6 +881,9 @@ snd_nm256_capture_open(snd_pcm_substream_t *substream) static int snd_nm256_playback_close(snd_pcm_substream_t *substream) { + nm256_t *chip = snd_pcm_substream_chip(substream); + + snd_nm256_release_irq(chip); return 0; } @@ -846,6 +891,9 @@ snd_nm256_playback_close(snd_pcm_substream_t *substream) static int snd_nm256_capture_close(snd_pcm_substream_t *substream) { + nm256_t *chip = snd_pcm_substream_chip(substream); + + snd_nm256_release_irq(chip); return 0; } @@ -915,18 +963,16 @@ snd_nm256_pcm(nm256_t *chip, int device) static void snd_nm256_init_chip(nm256_t *chip) { - spin_lock_irq(&chip->reg_lock); /* Reset everything. */ snd_nm256_writeb(chip, 0x0, 0x11); snd_nm256_writew(chip, 0x214, 0); /* stop sounds.. */ //snd_nm256_playback_stop(chip); //snd_nm256_capture_stop(chip); - spin_unlock_irq(&chip->reg_lock); } -static inline void +static irqreturn_t snd_nm256_intr_check(nm256_t *chip) { if (chip->badintrcount++ > 1000) { @@ -947,7 +993,9 @@ snd_nm256_intr_check(nm256_t *chip) if (chip->streams[SNDRV_PCM_STREAM_CAPTURE].running) snd_nm256_capture_stop(chip); chip->badintrcount = 0; + return IRQ_HANDLED; } + return IRQ_NONE; } /* @@ -969,10 +1017,8 @@ snd_nm256_interrupt(int irq, void *dev_id, struct pt_regs *dummy) status = snd_nm256_readw(chip, NM_INT_REG); /* Not ours. */ - if (status == 0) { - snd_nm256_intr_check(chip); - return IRQ_NONE; - } + if (status == 0) + return snd_nm256_intr_check(chip); chip->badintrcount = 0; @@ -1036,10 +1082,8 @@ snd_nm256_interrupt_zx(int irq, void *dev_id, struct pt_regs *dummy) status = snd_nm256_readl(chip, NM_INT_REG); /* Not ours. */ - if (status == 0) { - snd_nm256_intr_check(chip); - return IRQ_NONE; - } + if (status == 0) + return snd_nm256_intr_check(chip); chip->badintrcount = 0; @@ -1192,7 +1236,7 @@ snd_nm256_mixer(nm256_t *chip) AC97_PC_BEEP, AC97_PHONE, AC97_MIC, AC97_LINE, AC97_CD, AC97_VIDEO, AC97_AUX, AC97_PCM, AC97_REC_SEL, AC97_REC_GAIN, AC97_GENERAL_PURPOSE, AC97_3D_CONTROL, - AC97_EXTENDED_ID, + /*AC97_EXTENDED_ID,*/ AC97_VENDOR_ID1, AC97_VENDOR_ID2, -1 }; @@ -1206,6 +1250,7 @@ snd_nm256_mixer(nm256_t *chip) for (i = 0; mixer_regs[i] >= 0; i++) set_bit(mixer_regs[i], ac97.reg_accessed); ac97.private_data = chip; + pbus->no_vra = 1; err = snd_ac97_mixer(pbus, &ac97, &chip->ac97); if (err < 0) return err; @@ -1281,6 +1326,7 @@ static int nm256_suspend(snd_card_t *card, pm_message_t state) static int nm256_resume(snd_card_t *card) { nm256_t *chip = card->pm_private_data; + int i; /* Perform a full reset on the hardware */ pci_enable_device(chip->pci); @@ -1289,6 +1335,15 @@ static int nm256_resume(snd_card_t *card) /* restore ac97 */ snd_ac97_resume(chip->ac97); + for (i = 0; i < 2; i++) { + nm256_stream_t *s = &chip->streams[i]; + if (s->substream && s->suspended) { + spin_lock_irq(&chip->reg_lock); + snd_nm256_set_format(chip, s, s->substream); + spin_unlock_irq(&chip->reg_lock); + } + } + return 0; } #endif /* CONFIG_PM */ @@ -1360,6 +1415,7 @@ snd_nm256_create(snd_card_t *card, struct pci_dev *pci, chip->use_cache = usecache; spin_lock_init(&chip->reg_lock); chip->irq = -1; + init_MUTEX(&chip->irq_mutex); chip->streams[SNDRV_PCM_STREAM_PLAYBACK].bufsize = play_bufsize; chip->streams[SNDRV_PCM_STREAM_CAPTURE].bufsize = capt_bufsize; @@ -1470,15 +1526,6 @@ snd_nm256_create(snd_card_t *card, struct pci_dev *pci, chip->coeff_buf[SNDRV_PCM_STREAM_CAPTURE] = addr; } - /* acquire interrupt */ - if (request_irq(pci->irq, chip->interrupt, SA_INTERRUPT|SA_SHIRQ, - card->driver, (void*)chip)) { - err = -EBUSY; - snd_printk("unable to grab IRQ %d\n", pci->irq); - goto __error; - } - chip->irq = pci->irq; - /* Fixed setting. */ chip->mixer_base = NM_MIXER_OFFSET; From e8da2fbc2dd1fb4d603442f7220d23a2192955fa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Aug 2005 16:55:13 +0200 Subject: [PATCH 557/584] [ALSA] hdspm - Fix module parameter description RME9652 driver Fix the module parameter description after proofreading. Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index d4a0c2c56cdb..5d786d113b25 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -65,7 +65,7 @@ module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable/disable specific HDSPM soundcards."); module_param_array(precise_ptr, bool, NULL, 0444); -MODULE_PARM_DESC(precise_ptr, "Enable precise pointer, or disable."); +MODULE_PARM_DESC(precise_ptr, "Enable or disable precise pointer."); module_param_array(line_outs_monitor, bool, NULL, 0444); MODULE_PARM_DESC(line_outs_monitor, From 61be3ce0f2d9d80bc271e58c42cb9b021b3d48d8 Mon Sep 17 00:00:00 2001 From: Bjorge Dijkstra Date: Tue, 16 Aug 2005 17:05:05 +0200 Subject: [PATCH 558/584] [ALSA] ac97 - make ac97 codec device name unique AC97 Codec The patch fixes the bus_id conflict error when registering two codecs of the same type (ALSA bug#1334). Signed-off-by: Bjorge Dijkstra Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 600e053dfd35..5501f4440c92 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1829,7 +1829,7 @@ static int snd_ac97_dev_register(snd_device_t *device) ac97->dev.parent = ac97->bus->card->dev; ac97->dev.platform_data = ac97; ac97->dev.release = ac97_device_release; - strncpy(ac97->dev.bus_id, snd_ac97_get_short_name(ac97), BUS_ID_SIZE); + snprintf(ac97->dev.bus_id, BUS_ID_SIZE, "card%d-%d", ac97->bus->card->number, ac97->num); if ((err = device_register(&ac97->dev)) < 0) { snd_printk(KERN_ERR "Can't register ac97 bus\n"); ac97->dev.bus = NULL; From 41e4845c426024ec41724326397a36b3aa2cb6a3 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 18 Aug 2005 13:43:12 +0200 Subject: [PATCH 559/584] [ALSA] PCM resume cleanups Digigram VX core,CS4231 driver,ATIIXP driver,VIA82xx driver VIA82xx-modem driver,au88x0 driver,CS46xx driver,Trident driver This patch disables SNDRV_PCM_INFO_RESUME flag for drivers which does not support the full resume. Signed-off-by: Jaroslav Kysela --- sound/drivers/vx/vx_pcm.c | 8 ++++---- sound/isa/cs423x/cs4231_lib.c | 2 ++ sound/pci/atiixp.c | 10 +++++++++- sound/pci/au88x0/au88x0_pcm.c | 6 +++--- sound/pci/cs46xx/cs46xx_lib.c | 8 ++++---- sound/pci/trident/trident_main.c | 10 +++++----- sound/pci/via82xx.c | 4 +++- sound/pci/via82xx_modem.c | 3 ++- 8 files changed, 32 insertions(+), 19 deletions(-) diff --git a/sound/drivers/vx/vx_pcm.c b/sound/drivers/vx/vx_pcm.c index af381b15fe5c..d4becf44e247 100644 --- a/sound/drivers/vx/vx_pcm.c +++ b/sound/drivers/vx/vx_pcm.c @@ -549,8 +549,8 @@ static int vx_stop_stream(vx_core_t *chip, vx_pipe_t *pipe) static snd_pcm_hardware_t vx_pcm_playback_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_MMAP_VALID /*|*/ + /*SNDRV_PCM_INFO_RESUME*/), .formats = /*SNDRV_PCM_FMTBIT_U8 |*/ SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, .rate_min = 5000, @@ -949,8 +949,8 @@ static snd_pcm_ops_t vx_pcm_playback_ops = { static snd_pcm_hardware_t vx_pcm_capture_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_MMAP_VALID /*|*/ + /*SNDRV_PCM_INFO_RESUME*/), .formats = /*SNDRV_PCM_FMTBIT_U8 |*/ SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, .rate_min = 5000, diff --git a/sound/isa/cs423x/cs4231_lib.c b/sound/isa/cs423x/cs4231_lib.c index 3e7a2a33a5ca..3199941edd9b 100644 --- a/sound/isa/cs423x/cs4231_lib.c +++ b/sound/isa/cs423x/cs4231_lib.c @@ -1346,6 +1346,8 @@ static void snd_cs4231_suspend(cs4231_t *chip) int reg; unsigned long flags; + if (chip->pcm) + snd_pcm_suspend_all(chip->pcm); spin_lock_irqsave(&chip->reg_lock, flags); for (reg = 0; reg < 32; reg++) chip->image[reg] = snd_cs4231_in(chip, reg); diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index cafab4af5c57..904d17394e1c 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -248,6 +248,7 @@ struct snd_atiixp_dma { unsigned int period_bytes, periods; int opened; int running; + int suspended; int pcm_open_flag; int ac97_pcm_type; /* index # of ac97_pcm to access, -1 = not used */ unsigned int saved_curptr; @@ -699,12 +700,18 @@ static int snd_atiixp_pcm_trigger(snd_pcm_substream_t *substream, int cmd) spin_lock(&chip->reg_lock); switch (cmd) { case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + case SNDRV_PCM_TRIGGER_RESUME: dma->ops->enable_transfer(chip, 1); dma->running = 1; + dma->suspended = 0; break; case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_SUSPEND: dma->ops->enable_transfer(chip, 0); dma->running = 0; + dma->suspended = cmd == SNDRV_PCM_TRIGGER_SUSPEND; break; default: err = -EINVAL; @@ -975,6 +982,7 @@ static snd_pcm_hardware_t snd_atiixp_pcm_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME | SNDRV_PCM_INFO_MMAP_VALID), .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE, @@ -1443,7 +1451,7 @@ static int snd_atiixp_resume(snd_card_t *card) for (i = 0; i < NUM_ATI_PCMDEVS; i++) if (chip->pcmdevs[i]) { atiixp_dma_t *dma = &chip->dmas[i]; - if (dma->substream && dma->running) { + if (dma->substream && dma->suspended) { dma->ops->enable_dma(chip, 1); writel((u32)dma->desc_buf.addr | ATI_REG_LINKPTR_EN, chip->remap_addr + dma->ops->llp_offset); diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c index 04dcefd8b8ff..de95bd6d1ee1 100644 --- a/sound/pci/au88x0/au88x0_pcm.c +++ b/sound/pci/au88x0/au88x0_pcm.c @@ -33,7 +33,7 @@ /* hardware definition */ static snd_pcm_hardware_t snd_vortex_playback_hw_adb = { .info = - (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_RESUME | + (SNDRV_PCM_INFO_MMAP | /* SNDRV_PCM_INFO_RESUME | */ SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP_VALID), .formats = @@ -58,7 +58,7 @@ static snd_pcm_hardware_t snd_vortex_playback_hw_adb = { #ifndef CHIP_AU8820 static snd_pcm_hardware_t snd_vortex_playback_hw_a3d = { .info = - (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_RESUME | + (SNDRV_PCM_INFO_MMAP | /* SNDRV_PCM_INFO_RESUME | */ SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP_VALID), .formats = @@ -78,7 +78,7 @@ static snd_pcm_hardware_t snd_vortex_playback_hw_a3d = { #endif static snd_pcm_hardware_t snd_vortex_playback_hw_spdif = { .info = - (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_RESUME | + (SNDRV_PCM_INFO_MMAP | /* SNDRV_PCM_INFO_RESUME | */ SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP_VALID), .formats = diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c index 5eb9e0bb9100..4b052158ee33 100644 --- a/sound/pci/cs46xx/cs46xx_lib.c +++ b/sound/pci/cs46xx/cs46xx_lib.c @@ -1243,8 +1243,8 @@ static snd_pcm_hardware_t snd_cs46xx_playback = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_BLOCK_TRANSFER | - SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_BLOCK_TRANSFER /*|*/ + /*SNDRV_PCM_INFO_RESUME*/), .formats = (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE | SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_U16_BE), @@ -1265,8 +1265,8 @@ static snd_pcm_hardware_t snd_cs46xx_capture = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_BLOCK_TRANSFER | - SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_BLOCK_TRANSFER /*|*/ + /*SNDRV_PCM_INFO_RESUME*/), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, .rate_min = 5500, diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c index 29d89bfba0a4..f30d9d947862 100644 --- a/sound/pci/trident/trident_main.c +++ b/sound/pci/trident/trident_main.c @@ -1689,7 +1689,7 @@ static snd_pcm_hardware_t snd_trident_playback = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /* | SNDRV_PCM_INFO_RESUME */), .formats = (SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_U16_LE), .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, @@ -1714,7 +1714,7 @@ static snd_pcm_hardware_t snd_trident_capture = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /* | SNDRV_PCM_INFO_RESUME */), .formats = (SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_U16_LE), .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, @@ -1739,7 +1739,7 @@ static snd_pcm_hardware_t snd_trident_foldback = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /* | SNDRV_PCM_INFO_RESUME */), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, @@ -1763,7 +1763,7 @@ static snd_pcm_hardware_t snd_trident_spdif = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /* | SNDRV_PCM_INFO_RESUME */), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = (SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000), @@ -1784,7 +1784,7 @@ static snd_pcm_hardware_t snd_trident_spdif_7018 = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), + SNDRV_PCM_INFO_PAUSE /* | SNDRV_PCM_INFO_RESUME */), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 819f27b2762b..52ec785c550c 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -663,10 +663,12 @@ static int snd_via82xx_pcm_trigger(snd_pcm_substream_t * substream, int cmd) val = 0; switch (cmd) { case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: val |= VIA_REG_CTRL_START; viadev->running = 1; break; case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: val = VIA_REG_CTRL_TERMINATE; viadev->running = 0; break; @@ -1035,7 +1037,7 @@ static snd_pcm_hardware_t snd_via82xx_hw = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_RESUME | + /* SNDRV_PCM_INFO_RESUME | */ SNDRV_PCM_INFO_PAUSE), .formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c index 4a9779cc9733..5872d438a04a 100644 --- a/sound/pci/via82xx_modem.c +++ b/sound/pci/via82xx_modem.c @@ -521,6 +521,7 @@ static int snd_via82xx_pcm_trigger(snd_pcm_substream_t * substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_SUSPEND: val |= VIA_REG_CTRL_START; viadev->running = 1; break; @@ -697,7 +698,7 @@ static snd_pcm_hardware_t snd_via82xx_hw = .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_RESUME | + /* SNDRV_PCM_INFO_RESUME | */ SNDRV_PCM_INFO_PAUSE), .formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_KNOT, From 352dbfd0e6eb7733bcca79c14f8711bb86b55e4e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Aug 2005 17:49:10 +0200 Subject: [PATCH 560/584] [ALSA] via82xx - Add DXS entry for MSI 7142 VIA82xx driver Added the DXS entry for MSI 7142 (K8MM-V). Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 52ec785c550c..56c6e52d7264 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2171,6 +2171,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci) { .subvendor = 0x1297, .subdevice = 0xc160, .action = VIA_DXS_ENABLE }, /* Shuttle SK41G */ { .subvendor = 0x1458, .subdevice = 0xa002, .action = VIA_DXS_ENABLE }, /* Gigabyte GA-7VAXP */ { .subvendor = 0x1462, .subdevice = 0x0080, .action = VIA_DXS_SRC }, /* MSI K8T Neo-FIS2R */ + { .subvendor = 0x1462, .subdevice = 0x0430, .action = VIA_DXS_SRC }, /* MSI 7142 (K8MM-V) */ { .subvendor = 0x1462, .subdevice = 0x3800, .action = VIA_DXS_ENABLE }, /* MSI KT266 */ { .subvendor = 0x1462, .subdevice = 0x5901, .action = VIA_DXS_NO_VRA }, /* MSI KT6 Delta-SR */ { .subvendor = 0x1462, .subdevice = 0x7023, .action = VIA_DXS_NO_VRA }, /* MSI K8T Neo2-FI */ From 34ad73b9c5b5b25b795136c23f65b9a3ac21130b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 22 Aug 2005 11:30:37 +0200 Subject: [PATCH 561/584] [ALSA] ac97_bus - replace with for consistency AC97 Codec Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_bus.c b/sound/pci/ac97/ac97_bus.c index 227f8b9f67ce..013a919c2544 100644 --- a/sound/pci/ac97/ac97_bus.c +++ b/sound/pci/ac97/ac97_bus.c @@ -11,7 +11,7 @@ * (at your option) any later version. */ -#include +#include #include #include #include From 5049c35b1389097a1c0c7686519928eaaf31e40b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 22 Aug 2005 12:19:14 +0200 Subject: [PATCH 562/584] [ALSA] ac97_bus - revert last change and do it only in the ALSA tree AC97 Codec Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_bus.c b/sound/pci/ac97/ac97_bus.c index 013a919c2544..227f8b9f67ce 100644 --- a/sound/pci/ac97/ac97_bus.c +++ b/sound/pci/ac97/ac97_bus.c @@ -11,7 +11,7 @@ * (at your option) any later version. */ -#include +#include #include #include #include From 66d10647feb32178e1204d5b5c7126b1731efe9c Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Mon, 22 Aug 2005 13:43:39 +0200 Subject: [PATCH 563/584] [ALSA] HP nx6110 quirks Intel8x0 driver,AC97 Codec The HP nx6110 laptop needs to have Headphone Jack Sense enabled so that the internal speakers will be turned off when headphones are plugged in. Also ac97_quirk=hp_only is needed to make a single Master volume in the mixer instead of separate volumes for internal speakers and headphones, which just confuses the user. Signed-off-by: Sergey Vlasov Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_patch.c | 1 + sound/pci/intel8x0.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 616504ef3702..b584172c1104 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1629,6 +1629,7 @@ static void check_ad1981_hp_jack_sense(ac97_t *ac97) u32 subid = ((u32)ac97->subsystem_vendor << 16) | ac97->subsystem_device; switch (subid) { case 0x103c0890: /* HP nc6000 */ + case 0x103c099c: /* HP nx6110 */ case 0x103c006d: /* HP nx9105 */ case 0x17340088: /* FSC Scenic-W */ /* enable headphone jack sense */ diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 390b6c8f9a10..e38984dc4d95 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -1824,6 +1824,12 @@ static struct ac97_quirk ac97_quirks[] __devinitdata = { .name = "HP nc6000", .type = AC97_TUNE_MUTE_LED }, + { + .subvendor = 0x103c, + .subdevice = 0x099c, + .name = "HP nx6110", /* AD1981B */ + .type = AC97_TUNE_HP_ONLY + }, { .subvendor = 0x103c, .subdevice = 0x129d, From 9d8f53f2bba3c2c06e1e78126222aecf91f8ecdd Mon Sep 17 00:00:00 2001 From: Nicolas Graziano Date: Mon, 22 Aug 2005 13:47:16 +0200 Subject: [PATCH 564/584] [ALSA] hda-intel - correct a bug in detection of rate supported HDA Codec driver The insertion of the rate 9600 make a shift in detection of supported rate, put this rate at the end of the list. Signed-off-by: Nicolas Graziano Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 6bfb081d12dd..e067a14a2d9e 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1165,8 +1165,9 @@ int snd_hda_build_controls(struct hda_bus *bus) */ static unsigned int rate_bits[][3] = { /* rate in Hz, ALSA rate bitmask, HDA format value */ + + /* autodetected value used in snd_hda_query_supported_pcm */ { 8000, SNDRV_PCM_RATE_8000, 0x0500 }, /* 1/6 x 48 */ - { 9600, SNDRV_PCM_RATE_KNOT, 0x0400 }, /* 1/5 x 48 */ { 11025, SNDRV_PCM_RATE_11025, 0x4300 }, /* 1/4 x 44 */ { 16000, SNDRV_PCM_RATE_16000, 0x0200 }, /* 1/3 x 48 */ { 22050, SNDRV_PCM_RATE_22050, 0x4100 }, /* 1/2 x 44 */ @@ -1177,6 +1178,9 @@ static unsigned int rate_bits[][3] = { { 96000, SNDRV_PCM_RATE_96000, 0x0800 }, /* 2 x 48 */ { 176400, SNDRV_PCM_RATE_176400, 0x5800 },/* 4 x 44 */ { 192000, SNDRV_PCM_RATE_192000, 0x1800 }, /* 4 x 48 */ + + /* not autodetected value */ + { 9600, SNDRV_PCM_RATE_KNOT, 0x0400 }, /* 1/5 x 48 */ { 0 } }; From befdf316eaba02ed52284fb78a8027ff35c6a736 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 Aug 2005 13:57:55 +0200 Subject: [PATCH 565/584] [ALSA] hda-codec - Code clean up HDA Codec driver Use struct instead of array to improve the readability of hda_codec.c. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e067a14a2d9e..20f7762f7144 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1163,7 +1163,13 @@ int snd_hda_build_controls(struct hda_bus *bus) /* * stream formats */ -static unsigned int rate_bits[][3] = { +struct hda_rate_tbl { + unsigned int hz; + unsigned int alsa_bits; + unsigned int hda_fmt; +}; + +static struct hda_rate_tbl rate_bits[] = { /* rate in Hz, ALSA rate bitmask, HDA format value */ /* autodetected value used in snd_hda_query_supported_pcm */ @@ -1181,7 +1187,8 @@ static unsigned int rate_bits[][3] = { /* not autodetected value */ { 9600, SNDRV_PCM_RATE_KNOT, 0x0400 }, /* 1/5 x 48 */ - { 0 } + + { 0 } /* terminator */ }; /** @@ -1203,12 +1210,12 @@ unsigned int snd_hda_calc_stream_format(unsigned int rate, int i; unsigned int val = 0; - for (i = 0; rate_bits[i][0]; i++) - if (rate_bits[i][0] == rate) { - val = rate_bits[i][2]; + for (i = 0; rate_bits[i].hz; i++) + if (rate_bits[i].hz == rate) { + val = rate_bits[i].hda_fmt; break; } - if (! rate_bits[i][0]) { + if (! rate_bits[i].hz) { snd_printdd("invalid rate %d\n", rate); return 0; } @@ -1271,9 +1278,9 @@ int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid, if (ratesp) { u32 rates = 0; - for (i = 0; rate_bits[i][0]; i++) { + for (i = 0; rate_bits[i].hz; i++) { if (val & (1 << i)) - rates |= rate_bits[i][1]; + rates |= rate_bits[i].alsa_bits; } *ratesp = rates; } @@ -1365,13 +1372,13 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid, } rate = format & 0xff00; - for (i = 0; rate_bits[i][0]; i++) - if (rate_bits[i][2] == rate) { + for (i = 0; rate_bits[i].hz; i++) + if (rate_bits[i].hda_fmt == rate) { if (val & (1 << i)) break; return 0; } - if (! rate_bits[i][0]) + if (! rate_bits[i].hz) return 0; stream = snd_hda_param_read(codec, nid, AC_PAR_STREAM); From e0be4d32bdae5cebc4e6d9dc65886e279aa69d08 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2005 11:11:03 +0200 Subject: [PATCH 566/584] [ALSA] Fix compilation without CONFIG_PROC_FS Memalloc module Fix an error when built without CONFIG_PROC_FS. Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 371215cd9e8f..39a54a415528 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -655,8 +655,7 @@ static int __init snd_mem_init(void) static void __exit snd_mem_exit(void) { - if (snd_mem_proc) - remove_proc_entry(SND_MEM_PROC_FILE, NULL); + remove_proc_entry(SND_MEM_PROC_FILE, NULL); free_all_reserved_pages(); if (snd_allocated_pages > 0) printk(KERN_ERR "snd-malloc: Memory leak? pages not freed = %li\n", snd_allocated_pages); From 328ac7da6f49f2c8b8af349fef525ad79b4a3987 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2005 15:50:41 +0200 Subject: [PATCH 567/584] [ALSA] vxpocket - Add missing event callback Digigram VX Pocket driver Added the missing event callback. Signed-off-by: Takashi Iwai --- sound/pcmcia/vx/vxpocket.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 3a82161d3b24..fcb952f94ef8 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -376,7 +376,7 @@ static int vxpocket_event(event_t event, int priority, event_callback_args_t *ar /* */ -static dev_link_t *vxp_attach(void) +static dev_link_t *vxpocket_attach(void) { snd_card_t *card; struct snd_vxpocket *vxp; @@ -417,7 +417,7 @@ static dev_link_t *vxp_attach(void) return &vxp->link; } -static void vxp_detach(dev_link_t *link) +static void vxpocket_detach(dev_link_t *link) { struct snd_vxpocket *vxp; vx_core_t *chip; @@ -458,8 +458,9 @@ static struct pcmcia_driver vxp_cs_driver = { .drv = { .name = "snd-vxpocket", }, - .attach = vxp_attach, - .detach = vxp_detach, + .attach = vxpocket_attach, + .detach = vxpocket_detach, + .event = vxpocket_event, .id_table = vxp_ids, }; From adf111e6ff1674b81cae3ff7cdd5a5d1edf003f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2005 19:01:38 +0200 Subject: [PATCH 568/584] [ALSA] vxpocket - Fix wrong index assignment Digigram VX Pocket driver Fixed the wrong index number assignment. Signed-off-by: Takashi Iwai --- sound/pcmcia/vx/vxpocket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index fcb952f94ef8..de5bb9c26b77 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -407,7 +407,7 @@ static dev_link_t *vxpocket_attach(void) return NULL; } - vxp->index = index[i]; + vxp->index = i; card_alloc |= 1 << i; /* Chain drivers */ From 5ba094dbf14828858cdff27c8201df33b7a0bc96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2005 19:27:31 +0200 Subject: [PATCH 569/584] [ALSA] vxpocket - Register device pointer Digigram VX Pocket driver Add snd_card_set_dev() to register the device pointer. Signed-off-by: Takashi Iwai --- sound/pcmcia/vx/vxpocket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index de5bb9c26b77..1e8f16b4c073 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -297,6 +297,7 @@ static void vxpocket_config(dev_link_t *link) CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link->handle, &link->conf)); chip->dev = &handle_to_dev(link->handle); + snd_card_set_dev(chip->card, chip->dev); if (snd_vxpocket_assign_resources(chip, link->io.BasePort1, link->irq.AssignedIRQ) < 0) goto failed; From 3c10a9d9f8c76fd87c92a14c201ae211d0b81288 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2005 20:02:27 +0200 Subject: [PATCH 570/584] [ALSA] hda-codec - Add beep support for Uniwill HDA Codec driver Added the missing beep support for Uniwill laptop (ALSA bug#1358). Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d19da2bae663..eeb900ab79af 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -687,6 +687,12 @@ static snd_kcontrol_new_t alc880_asus_w1v_mixer[] = { { } /* end */ }; +/* additional mixers to alc880_asus_mixer */ +static snd_kcontrol_new_t alc880_pcbeep_mixer[] = { + HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x0b, 0x05, HDA_INPUT), + HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x0b, 0x05, HDA_INPUT), + { } /* end */ +}; /* * build control elements @@ -1735,7 +1741,7 @@ static struct alc_config_preset alc880_presets[] = { .input_mux = &alc880_capture_source, }, [ALC880_UNIWILL_DIG] = { - .mixers = { alc880_asus_mixer }, + .mixers = { alc880_asus_mixer, alc880_pcbeep_mixer }, .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs }, .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids), .dac_nids = alc880_asus_dac_nids, From 07e4ca50a5f82aa6eab52e348059579b250c63db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Aug 2005 14:14:57 +0200 Subject: [PATCH 571/584] [ALSA] hda-intel - Fix ULI M5461 support HDA Intel driver Fix and clean up for the support of ULI M5461 - set CORB/RIRB sizes explicitly - add workarounds for ULI on ia32 - max number of streams depends on the chip type now - increase the size of BDL Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 150 +++++++++++++++++++++++++++++--------- 1 file changed, 117 insertions(+), 33 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 58e15b8896ac..15107df1f490 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -72,7 +72,8 @@ MODULE_SUPPORTED_DEVICE("{{Intel, ICH6}," "{ATI, SB450}," "{VIA, VT8251}," "{VIA, VT8237A}," - "{SiS, SIS966}}"); + "{SiS, SIS966}," + "{ULI, M5461}}"); MODULE_DESCRIPTION("Intel HDA driver"); #define SFX "hda-intel: " @@ -142,9 +143,24 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; */ /* max number of SDs */ -#define MAX_ICH6_DEV 8 +/* ICH, ATI and VIA have 4 playback and 4 capture */ +#define ICH6_CAPTURE_INDEX 0 +#define ICH6_NUM_CAPTURE 4 +#define ICH6_PLAYBACK_INDEX 4 +#define ICH6_NUM_PLAYBACK 4 + +/* ULI has 6 playback and 5 capture */ +#define ULI_CAPTURE_INDEX 0 +#define ULI_NUM_CAPTURE 5 +#define ULI_PLAYBACK_INDEX 5 +#define ULI_NUM_PLAYBACK 6 + +/* this number is statically defined for simplicity */ +#define MAX_AZX_DEV 16 + /* max number of fragments - we may use more if allocating more pages for BDL */ -#define AZX_MAX_FRAG (PAGE_SIZE / (MAX_ICH6_DEV * 16)) +#define BDL_SIZE PAGE_ALIGN(8192) +#define AZX_MAX_FRAG (BDL_SIZE / (MAX_AZX_DEV * 16)) /* max buffer size - no h/w limit, you can increase as you like */ #define AZX_MAX_BUF_SIZE (1024*1024*1024) /* max number of PCM devics per card */ @@ -201,7 +217,6 @@ enum { }; /* Defines for ATI HD Audio support in SB450 south bridge */ -#define ATI_SB450_HDAUDIO_PCI_DEVICE_ID 0x437b #define ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR 0x42 #define ATI_SB450_HDAUDIO_ENABLE_SNOOP 0x02 @@ -259,6 +274,14 @@ struct snd_azx { snd_card_t *card; struct pci_dev *pci; + /* chip type specific */ + int driver_type; + int playback_streams; + int playback_index_offset; + int capture_streams; + int capture_index_offset; + int num_streams; + /* pci resources */ unsigned long addr; void __iomem *remap_addr; @@ -268,8 +291,8 @@ struct snd_azx { spinlock_t reg_lock; struct semaphore open_mutex; - /* streams */ - azx_dev_t azx_dev[MAX_ICH6_DEV]; + /* streams (x num_streams) */ + azx_dev_t *azx_dev; /* PCM */ unsigned int pcm_devs; @@ -293,6 +316,23 @@ struct snd_azx { unsigned int initialized: 1; }; +/* driver types */ +enum { + AZX_DRIVER_ICH, + AZX_DRIVER_ATI, + AZX_DRIVER_VIA, + AZX_DRIVER_SIS, + AZX_DRIVER_ULI, +}; + +static char *driver_short_names[] __devinitdata = { + [AZX_DRIVER_ICH] = "HDA Intel", + [AZX_DRIVER_ATI] = "HDA ATI SB", + [AZX_DRIVER_VIA] = "HDA VIA VT82xx", + [AZX_DRIVER_SIS] = "HDA SIS966", + [AZX_DRIVER_ULI] = "HDA ULI M5461" +}; + /* * macros for easy use */ @@ -361,6 +401,8 @@ static void azx_init_cmd_io(azx_t *chip) azx_writel(chip, CORBLBASE, (u32)chip->corb.addr); azx_writel(chip, CORBUBASE, upper_32bit(chip->corb.addr)); + /* set the corb size to 256 entries (ULI requires explicitly) */ + azx_writeb(chip, CORBSIZE, 0x02); /* set the corb write pointer to 0 */ azx_writew(chip, CORBWP, 0); /* reset the corb hw read pointer */ @@ -374,6 +416,8 @@ static void azx_init_cmd_io(azx_t *chip) azx_writel(chip, RIRBLBASE, (u32)chip->rirb.addr); azx_writel(chip, RIRBUBASE, upper_32bit(chip->rirb.addr)); + /* set the rirb size to 256 entries (ULI requires explicitly) */ + azx_writeb(chip, RIRBSIZE, 0x02); /* reset the rirb hw write pointer */ azx_writew(chip, RIRBWP, ICH6_RBRWP_CLR); /* set N=1, get RIRB response interrupt for new entry */ @@ -597,7 +641,7 @@ static void azx_int_disable(azx_t *chip) int i; /* disable interrupts in stream descriptor */ - for (i = 0; i < MAX_ICH6_DEV; i++) { + for (i = 0; i < chip->num_streams; i++) { azx_dev_t *azx_dev = &chip->azx_dev[i]; azx_sd_writeb(azx_dev, SD_CTL, azx_sd_readb(azx_dev, SD_CTL) & ~SD_INT_MASK); @@ -617,7 +661,7 @@ static void azx_int_clear(azx_t *chip) int i; /* clear stream status */ - for (i = 0; i < MAX_ICH6_DEV; i++) { + for (i = 0; i < chip->num_streams; i++) { azx_dev_t *azx_dev = &chip->azx_dev[i]; azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK); } @@ -687,8 +731,7 @@ static void azx_init_chip(azx_t *chip) } /* For ATI SB450 azalia HD audio, we need to enable snoop */ - if (chip->pci->vendor == PCI_VENDOR_ID_ATI && - chip->pci->device == ATI_SB450_HDAUDIO_PCI_DEVICE_ID) { + if (chip->driver_type == AZX_DRIVER_ATI) { pci_read_config_byte(chip->pci, ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR, &ati_misc_cntl2); pci_write_config_byte(chip->pci, ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR, @@ -715,7 +758,7 @@ static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs) return IRQ_NONE; } - for (i = 0; i < MAX_ICH6_DEV; i++) { + for (i = 0; i < chip->num_streams; i++) { azx_dev = &chip->azx_dev[i]; if (status & azx_dev->sd_int_sta_mask) { azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK); @@ -880,9 +923,15 @@ static int __devinit azx_codec_create(azx_t *chip, const char *model) /* assign a stream for the PCM */ static inline azx_dev_t *azx_assign_device(azx_t *chip, int stream) { - int dev, i; - dev = stream == SNDRV_PCM_STREAM_PLAYBACK ? 4 : 0; - for (i = 0; i < 4; i++, dev++) + int dev, i, nums; + if (stream == SNDRV_PCM_STREAM_PLAYBACK) { + dev = chip->playback_index_offset; + nums = chip->playback_streams; + } else { + dev = chip->capture_index_offset; + nums = chip->capture_streams; + } + for (i = 0; i < nums; i++, dev++) if (! chip->azx_dev[dev].opened) { chip->azx_dev[dev].opened = 1; return &chip->azx_dev[dev]; @@ -1190,7 +1239,7 @@ static int __devinit azx_init_stream(azx_t *chip) /* initialize each stream (aka device) * assign the starting bdl address to each stream (device) and initialize */ - for (i = 0; i < MAX_ICH6_DEV; i++) { + for (i = 0; i < chip->num_streams; i++) { unsigned int off = sizeof(u32) * (i * AZX_MAX_FRAG * 4); azx_dev_t *azx_dev = &chip->azx_dev[i]; azx_dev->bdl = (u32 *)(chip->bdl.area + off); @@ -1249,7 +1298,7 @@ static int azx_free(azx_t *chip) if (chip->initialized) { int i; - for (i = 0; i < MAX_ICH6_DEV; i++) + for (i = 0; i < chip->num_streams; i++) azx_stream_stop(chip, &chip->azx_dev[i]); /* disable interrupts */ @@ -1265,10 +1314,10 @@ static int azx_free(azx_t *chip) /* wait a little for interrupts to finish */ msleep(1); - - iounmap(chip->remap_addr); } + if (chip->remap_addr) + iounmap(chip->remap_addr); if (chip->irq >= 0) free_irq(chip->irq, (void*)chip); @@ -1280,6 +1329,7 @@ static int azx_free(azx_t *chip) snd_dma_free_pages(&chip->posbuf); pci_release_regions(chip->pci); pci_disable_device(chip->pci); + kfree(chip->azx_dev); kfree(chip); return 0; @@ -1294,7 +1344,8 @@ static int azx_dev_free(snd_device_t *device) * constructor */ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, - int posfix, azx_t **rchip) + int posfix, int driver_type, + azx_t **rchip) { azx_t *chip; int err = 0; @@ -1320,9 +1371,20 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, chip->card = card; chip->pci = pci; chip->irq = -1; + chip->driver_type = driver_type; chip->position_fix = posfix; +#if BITS_PER_LONG != 64 + /* Fix up base address on ULI M5461 */ + if (chip->driver_type == AZX_DRIVER_ULI) { + u16 tmp3; + pci_read_config_word(pci, 0x40, &tmp3); + pci_write_config_word(pci, 0x40, tmp3 | 0x10); + pci_write_config_dword(pci, PCI_BASE_ADDRESS_1, 0); + } +#endif + if ((err = pci_request_regions(pci, "ICH HD audio")) < 0) { kfree(chip); pci_disable_device(pci); @@ -1348,16 +1410,37 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, pci_set_master(pci); synchronize_irq(chip->irq); + switch (chip->driver_type) { + case AZX_DRIVER_ULI: + chip->playback_streams = ULI_NUM_PLAYBACK; + chip->capture_streams = ULI_NUM_CAPTURE; + chip->playback_index_offset = ULI_PLAYBACK_INDEX; + chip->capture_index_offset = ULI_CAPTURE_INDEX; + break; + default: + chip->playback_streams = ICH6_NUM_PLAYBACK; + chip->capture_streams = ICH6_NUM_CAPTURE; + chip->playback_index_offset = ICH6_PLAYBACK_INDEX; + chip->capture_index_offset = ICH6_CAPTURE_INDEX; + break; + } + chip->num_streams = chip->playback_streams + chip->capture_streams; + chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev), GFP_KERNEL); + if (! chip->azx_dev) { + snd_printk(KERN_ERR "cannot malloc azx_dev\n"); + goto errout; + } + /* allocate memory for the BDL for each stream */ if ((err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - PAGE_SIZE, &chip->bdl)) < 0) { + BDL_SIZE, &chip->bdl)) < 0) { snd_printk(KERN_ERR SFX "cannot allocate BDL\n"); goto errout; } if (chip->position_fix == POS_FIX_POSBUF) { /* allocate memory for the position buffer */ if ((err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - MAX_ICH6_DEV * 8, &chip->posbuf)) < 0) { + chip->num_streams * 8, &chip->posbuf)) < 0) { snd_printk(KERN_ERR SFX "cannot allocate posbuf\n"); goto errout; } @@ -1386,6 +1469,10 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, goto errout; } + strcpy(card->driver, "HDA-Intel"); + strcpy(card->shortname, driver_short_names[chip->driver_type]); + sprintf(card->longname, "%s at 0x%lx irq %i", card->shortname, chip->addr, chip->irq); + *rchip = chip; return 0; @@ -1414,15 +1501,12 @@ static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id * return -ENOMEM; } - if ((err = azx_create(card, pci, position_fix[dev], &chip)) < 0) { + if ((err = azx_create(card, pci, position_fix[dev], pci_id->driver_data, + &chip)) < 0) { snd_card_free(card); return err; } - strcpy(card->driver, "HDA-Intel"); - strcpy(card->shortname, "HDA Intel"); - sprintf(card->longname, "%s at 0x%lx irq %i", card->shortname, chip->addr, chip->irq); - /* create codec instances */ if ((err = azx_codec_create(chip, model[dev])) < 0) { snd_card_free(card); @@ -1463,13 +1547,13 @@ static void __devexit azx_remove(struct pci_dev *pci) /* PCI IDs */ static struct pci_device_id azx_ids[] = { - { 0x8086, 0x2668, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ICH6 */ - { 0x8086, 0x27d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ICH7 */ - { 0x8086, 0x269a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ESB2 */ - { 0x1002, 0x437b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ATI SB450 */ - { 0x1106, 0x3288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* VIA VT8251/VT8237A */ - { 0x1039, 0x7502, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* SIS966 */ - { 0x10b9, 0x5461, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ALI 5461? */ + { 0x8086, 0x2668, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH6 */ + { 0x8086, 0x27d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH7 */ + { 0x8086, 0x269a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ESB2 */ + { 0x1002, 0x437b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATI }, /* ATI SB450 */ + { 0x1106, 0x3288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_VIA }, /* VIA VT8251/VT8237A */ + { 0x1039, 0x7502, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_SIS }, /* SIS966 */ + { 0x10b9, 0x5461, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ULI }, /* ULI M5461 */ { 0, } }; MODULE_DEVICE_TABLE(pci, azx_ids); From f442e8b0eae4fbb6bbf9b8b005002b51a59e23bf Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 24 Aug 2005 17:01:02 +0200 Subject: [PATCH 572/584] [ALSA] include/sound/gus.h: 'extern inline' -> 'static inline' GUS Library 'extern inline' doesn't make much sense. Signed-off-by: Adrian Bunk Signed-off-by: Takashi Iwai --- include/sound/gus.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sound/gus.h b/include/sound/gus.h index b4b461ca173d..7000d9d9199d 100644 --- a/include/sound/gus.h +++ b/include/sound/gus.h @@ -512,13 +512,13 @@ extern void snd_gf1_ctrl_stop(snd_gus_card_t * gus, unsigned char reg); extern void snd_gf1_write8(snd_gus_card_t * gus, unsigned char reg, unsigned char data); extern unsigned char snd_gf1_look8(snd_gus_card_t * gus, unsigned char reg); -extern inline unsigned char snd_gf1_read8(snd_gus_card_t * gus, unsigned char reg) +static inline unsigned char snd_gf1_read8(snd_gus_card_t * gus, unsigned char reg) { return snd_gf1_look8(gus, reg | 0x80); } extern void snd_gf1_write16(snd_gus_card_t * gus, unsigned char reg, unsigned int data); extern unsigned short snd_gf1_look16(snd_gus_card_t * gus, unsigned char reg); -extern inline unsigned short snd_gf1_read16(snd_gus_card_t * gus, unsigned char reg) +static inline unsigned short snd_gf1_read16(snd_gus_card_t * gus, unsigned char reg) { return snd_gf1_look16(gus, reg | 0x80); } @@ -532,12 +532,12 @@ extern void snd_gf1_i_ctrl_stop(snd_gus_card_t * gus, unsigned char reg); extern void snd_gf1_i_write8(snd_gus_card_t * gus, unsigned char reg, unsigned char data); extern unsigned char snd_gf1_i_look8(snd_gus_card_t * gus, unsigned char reg); extern void snd_gf1_i_write16(snd_gus_card_t * gus, unsigned char reg, unsigned int data); -extern inline unsigned char snd_gf1_i_read8(snd_gus_card_t * gus, unsigned char reg) +static inline unsigned char snd_gf1_i_read8(snd_gus_card_t * gus, unsigned char reg) { return snd_gf1_i_look8(gus, reg | 0x80); } extern unsigned short snd_gf1_i_look16(snd_gus_card_t * gus, unsigned char reg); -extern inline unsigned short snd_gf1_i_read16(snd_gus_card_t * gus, unsigned char reg) +static inline unsigned short snd_gf1_i_read16(snd_gus_card_t * gus, unsigned char reg) { return snd_gf1_i_look16(gus, reg | 0x80); } From a278655ff5d0c9d5eb34cf99f3a4c20da09eb09e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Aug 2005 17:57:25 +0200 Subject: [PATCH 573/584] [ALSA] Fix missing spin_unlock au88x0 driver,Common EMU synth Fixed missing spin_unlock. Signed-off-by: Takashi Iwai --- sound/pci/au88x0/au88x0_pcm.c | 4 +++- sound/synth/emux/emux_synth.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c index de95bd6d1ee1..38bd2b5dd434 100644 --- a/sound/pci/au88x0/au88x0_pcm.c +++ b/sound/pci/au88x0/au88x0_pcm.c @@ -220,8 +220,10 @@ snd_vortex_pcm_hw_params(snd_pcm_substream_t * substream, vortex_adb_allocroute(chip, -1, params_channels(hw_params), substream->stream, type); - if (dma < 0) + if (dma < 0) { + spin_unlock_irq(&chip->lock); return dma; + } stream = substream->runtime->private_data = &chip->dma_adb[dma]; stream->substream = substream; /* Setup Buffers. */ diff --git a/sound/synth/emux/emux_synth.c b/sound/synth/emux/emux_synth.c index f13b038329eb..751bf1272af3 100644 --- a/sound/synth/emux/emux_synth.c +++ b/sound/synth/emux/emux_synth.c @@ -98,7 +98,6 @@ snd_emux_note_on(void *p, int note, int vel, snd_midi_channel_t *chan) vp = emu->ops.get_voice(emu, port); if (vp == NULL || vp->ch < 0) continue; - snd_assert(vp->emu != NULL && vp->hw != NULL, return); if (STATE_IS_PLAYING(vp->state)) emu->ops.terminate(vp); From c347e9fca710551f0def6a4d58505a6f4c0d87f6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 25 Aug 2005 11:10:05 +0200 Subject: [PATCH 574/584] [ALSA] usb-audio: fix Emagic MIDI protocol handling USB generic driver Emagic devices pad their packets not with 0xff bytes but with a 0xff byte followed by garbage, so we have to stop at the first such byte. Signed-off-by: Clemens Ladisch --- sound/usb/usbmidi.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index d0d895df5375..5f19b494923e 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -594,17 +594,20 @@ static void snd_usbmidi_emagic_finish_out(snd_usb_midi_out_endpoint_t* ep) static void snd_usbmidi_emagic_input(snd_usb_midi_in_endpoint_t* ep, uint8_t* buffer, int buffer_length) { - /* ignore padding bytes at end of buffer */ - while (buffer_length > 0 && buffer[buffer_length - 1] == 0xff) - --buffer_length; + int i; + + /* FF indicates end of valid data */ + for (i = 0; i < buffer_length; ++i) + if (buffer[i] == 0xff) { + buffer_length = i; + break; + } /* handle F5 at end of last buffer */ if (ep->seen_f5) goto switch_port; while (buffer_length > 0) { - int i; - /* determine size of data until next F5 */ for (i = 0; i < buffer_length; ++i) if (buffer[i] == 0xf5) @@ -671,6 +674,10 @@ static void snd_usbmidi_emagic_output(snd_usb_midi_out_endpoint_t* ep) break; } } + if (buf_free < ep->max_transfer && buf_free > 0) { + *buf = 0xff; + --buf_free; + } ep->urb->transfer_buffer_length = ep->max_transfer - buf_free; } From e0c93cf311b3eaa04bfb5769cff38861d4e748c8 Mon Sep 17 00:00:00 2001 From: Daniel Horchner Date: Thu, 25 Aug 2005 13:02:24 +0200 Subject: [PATCH 575/584] [ALSA] intel8x0 - add AC97_TUNE_MUTE_LED quirk for HP nx8220 laptop 103c:0934 Intel8x0 driver Signed-off-by: Jaroslav Kysela --- sound/pci/intel8x0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index e38984dc4d95..05c7994a8d5e 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -1824,6 +1824,12 @@ static struct ac97_quirk ac97_quirks[] __devinitdata = { .name = "HP nc6000", .type = AC97_TUNE_MUTE_LED }, + { + .subvendor = 0x103c, + .subdevice = 0x0934, + .name = "HP nx8220", + .type = AC97_TUNE_MUTE_LED + }, { .subvendor = 0x103c, .subdevice = 0x099c, From 7153a558ad598ff521f1d9430982e2a4a6865126 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Aug 2005 19:51:47 +0200 Subject: [PATCH 576/584] [ALSA] pcm - Fix zero-division in 32bit compat layer PCM Midlevel Fixed zero-division bug in PCM 32bit compat layer. Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index eef94a15f50a..4b6307df846d 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -144,7 +144,7 @@ static int snd_pcm_ioctl_sw_params_compat(snd_pcm_substream_t *substream, err = snd_pcm_sw_params(substream, ¶ms); if (err < 0) return err; - if (put_user(boundary, &src->boundary)) + if (boundary && put_user(boundary, &src->boundary)) return -EFAULT; return err; } @@ -252,8 +252,11 @@ static int snd_pcm_ioctl_hw_params_compat(snd_pcm_substream_t *substream, goto error; } - if (! refine) - runtime->boundary = recalculate_boundary(runtime); + if (! refine) { + unsigned int new_boundary = recalculate_boundary(runtime); + if (new_boundary) + runtime->boundary = new_boundary; + } error: kfree(data); return err; @@ -408,6 +411,8 @@ static int snd_pcm_ioctl_sync_ptr_compat(snd_pcm_substream_t *substream, status = runtime->status; control = runtime->control; boundary = recalculate_boundary(runtime); + if (! boundary) + boundary = 0x7fffffff; snd_pcm_stream_lock_irq(substream); /* FIXME: we should consider the boundary for the sync from app */ if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) From 9970dce56686d7b71310388025d8925d3d29e6ec Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Aug 2005 20:30:27 +0200 Subject: [PATCH 577/584] [ALSA] intel8x0 - Add quirk for FSC 4010 Intel8x0 driver Added ac97_quirk for FSC 4010. Signed-off-by: Takashi Iwai --- sound/pci/intel8x0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 05c7994a8d5e..7b548416dcef 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -1890,6 +1890,12 @@ static struct ac97_quirk ac97_quirks[] __devinitdata = { .name = "Fujitsu S6210", /* STAC9750/51 */ .type = AC97_TUNE_HP_ONLY }, + { + .subvendor = 0x10cf, + .subdevice = 0x12ec, + .name = "Fujitsu-Siemens 4010", + .type = AC97_TUNE_HP_ONLY + }, { .subvendor = 0x10f1, .subdevice = 0x2665, From 6a293552ba0d104ef9d35b48972a01540fbd181b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 26 Aug 2005 17:26:40 +0200 Subject: [PATCH 578/584] [ALSA] emu10k1 - Add missing ac97 support on SBLive! Player 5.1 EMU10K1/EMU10K2 driver Added the missing ac97 support on SBLive! Player 5.1. Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index c0b67b70e345..83597c694596 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -752,6 +752,7 @@ static emu_chip_details_t emu_chip_details[] = { .driver = "EMU10K1", .name = "SBLive! Player 5.1 [SB0060]", .id = "Live", .emu10k1_chip = 1, + .ac97_chip = 1, .sblive51 = 1} , {.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80511102, .driver = "EMU10K1", .name = "SBLive! Value [CT4850]", From c6c0b841dd85c878fe9794862b10ce8eb4e90761 Mon Sep 17 00:00:00 2001 From: Lee Revell Date: Mon, 29 Aug 2005 17:42:00 +0200 Subject: [PATCH 579/584] [ALSA] Add identification for Live 5.1 [SB0220] EMU10K1/EMU10K2 driver Add identification for Live 5.1 [SB0220] Signed-off-by: Lee Revell Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 83597c694596..e69d5b739e80 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -741,6 +741,13 @@ static emu_chip_details_t emu_chip_details[] = { .emu10k1_chip = 1, .ac97_chip = 1, .sblive51 = 1} , + /* Tested by Thomas Zehetbauer 27th Aug 2005 */ + {.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80651102, + .driver = "EMU10K1", .name = "SB Live 5.1 [SB0220]", + .id = "Live", + .emu10k1_chip = 1, + .ac97_chip = 1, + .sblive51 = 1} , {.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80641102, .driver = "EMU10K1", .name = "SB Live 5.1", .id = "Live", From 68c339d9068502347015ce462530ec54c4d841e4 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 30 Aug 2005 08:48:35 +0200 Subject: [PATCH 580/584] [ALSA] version 1.0.10rc1 --- include/sound/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sound/version.h b/include/sound/version.h index c085136f391f..8d19bfabb7e0 100644 --- a/include/sound/version.h +++ b/include/sound/version.h @@ -1,3 +1,3 @@ /* include/version.h. Generated by configure. */ -#define CONFIG_SND_VERSION "1.0.9b" -#define CONFIG_SND_DATE " (Thu Jul 28 12:20:13 2005 UTC)" +#define CONFIG_SND_VERSION "1.0.10rc1" +#define CONFIG_SND_DATE " (Tue Aug 30 05:31:08 2005 UTC)" From d568121ce3151c36cc4718dd4e977f217c6144c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 Aug 2005 08:58:37 +0200 Subject: [PATCH 581/584] [PATCH] Assign device pointer to OSS devices Add register_sound_special_device() function to allow assignment of device pointer to a specific OSS device for HAL. Signed-off-by: Takashi Iwai --- include/linux/sound.h | 2 ++ sound/core/sound_oss.c | 7 +++++-- sound/sound_core.c | 27 ++++++++++++++++++--------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/include/linux/sound.h b/include/linux/sound.h index 428f59794f48..72b9af4c3fd4 100644 --- a/include/linux/sound.h +++ b/include/linux/sound.h @@ -29,7 +29,9 @@ * Sound core interface functions */ +struct device; extern int register_sound_special(struct file_operations *fops, int unit); +extern int register_sound_special_device(struct file_operations *fops, int unit, struct device *dev); extern int register_sound_mixer(struct file_operations *fops, int dev); extern int register_sound_midi(struct file_operations *fops, int dev); extern int register_sound_dsp(struct file_operations *fops, int dev); diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c index de39d212bc15..e401c6703297 100644 --- a/sound/core/sound_oss.c +++ b/sound/core/sound_oss.c @@ -98,6 +98,7 @@ int snd_register_oss_device(int type, snd_card_t * card, int dev, snd_minor_t * int cidx = SNDRV_MINOR_OSS_CARD(minor); int track2 = -1; int register1 = -1, register2 = -1; + struct device *carddev = NULL; if (minor < 0) return minor; @@ -121,11 +122,13 @@ int snd_register_oss_device(int type, snd_card_t * card, int dev, snd_minor_t * track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_DMMIDI1); break; } - register1 = register_sound_special(reg->f_ops, minor); + if (card) + carddev = card->dev; + register1 = register_sound_special_device(reg->f_ops, minor, carddev); if (register1 != minor) goto __end; if (track2 >= 0) { - register2 = register_sound_special(reg->f_ops, track2); + register2 = register_sound_special_device(reg->f_ops, track2, carddev); if (register2 != track2) goto __end; } diff --git a/sound/sound_core.c b/sound/sound_core.c index 21a69e096225..954f994592ab 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -153,7 +153,7 @@ static DEFINE_SPINLOCK(sound_loader_lock); * list. Acquires locks as needed */ -static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode) +static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev) { struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL); int r; @@ -175,7 +175,7 @@ static int sound_insert_unit(struct sound_unit **list, struct file_operations *f devfs_mk_cdev(MKDEV(SOUND_MAJOR, s->unit_minor), S_IFCHR | mode, s->name); class_device_create(sound_class, MKDEV(SOUND_MAJOR, s->unit_minor), - NULL, s->name+6); + dev, s->name+6); return r; fail: @@ -227,16 +227,18 @@ static void sound_remove_unit(struct sound_unit **list, int unit) static struct sound_unit *chains[SOUND_STEP]; /** - * register_sound_special - register a special sound node + * register_sound_special_device - register a special sound node * @fops: File operations for the driver * @unit: Unit number to allocate + * @dev: device pointer * * Allocate a special sound device by minor number from the sound * subsystem. The allocated number is returned on succes. On failure * a negative error code is returned. */ -int register_sound_special(struct file_operations *fops, int unit) +int register_sound_special_device(struct file_operations *fops, int unit, + struct device *dev) { const int chain = unit % SOUND_STEP; int max_unit = 128 + chain; @@ -294,9 +296,16 @@ int register_sound_special(struct file_operations *fops, int unit) break; } return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit, - name, S_IRUSR | S_IWUSR); + name, S_IRUSR | S_IWUSR, dev); } +EXPORT_SYMBOL(register_sound_special_device); + +int register_sound_special(struct file_operations *fops, int unit) +{ + return register_sound_special_device(fops, unit, NULL); +} + EXPORT_SYMBOL(register_sound_special); /** @@ -312,7 +321,7 @@ EXPORT_SYMBOL(register_sound_special); int register_sound_mixer(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[0], fops, dev, 0, 128, - "mixer", S_IRUSR | S_IWUSR); + "mixer", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_mixer); @@ -330,7 +339,7 @@ EXPORT_SYMBOL(register_sound_mixer); int register_sound_midi(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[2], fops, dev, 2, 130, - "midi", S_IRUSR | S_IWUSR); + "midi", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_midi); @@ -356,7 +365,7 @@ EXPORT_SYMBOL(register_sound_midi); int register_sound_dsp(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[3], fops, dev, 3, 131, - "dsp", S_IWUSR | S_IRUSR); + "dsp", S_IWUSR | S_IRUSR, NULL); } EXPORT_SYMBOL(register_sound_dsp); @@ -375,7 +384,7 @@ EXPORT_SYMBOL(register_sound_dsp); int register_sound_synth(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[9], fops, dev, 9, 137, - "synth", S_IRUSR | S_IWUSR); + "synth", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_synth); From c88469704d63787e8d44ca5ea1c1bd0adc29572d Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 2 Aug 2005 15:26:52 +0200 Subject: [PATCH 582/584] [ALSA] usb-audio: throttle MIDI URB resubmits on USB errors USB generic driver When a USB error occurs that might indicate that the device has been unplugged, don't resubmit the URB immediately to prevent flooding the log with error messages before khubd has us disconnect()ed. Signed-off-by: Clemens Ladisch --- sound/usb/usbmidi.c | 77 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index 5f19b494923e..93dedde3c428 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,12 @@ */ /* #define DUMP_PACKETS */ +/* + * how long to wait after some USB errors, so that khubd can disconnect() us + * without too many spurious errors + */ +#define ERROR_DELAY_JIFFIES (HZ / 10) + MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("USB Audio/MIDI helper module"); @@ -100,6 +107,7 @@ struct snd_usb_midi { snd_rawmidi_t* rmidi; struct usb_protocol_ops* usb_protocol_ops; struct list_head list; + struct timer_list error_timer; struct snd_usb_midi_endpoint { snd_usb_midi_out_endpoint_t *out; @@ -141,7 +149,8 @@ struct snd_usb_midi_in_endpoint { struct usbmidi_in_port { snd_rawmidi_substream_t* substream; } ports[0x10]; - int seen_f5; + u8 seen_f5; + u8 error_resubmit; int current_port; }; @@ -167,14 +176,22 @@ static int snd_usbmidi_submit_urb(struct urb* urb, int flags) */ static int snd_usbmidi_urb_error(int status) { - if (status == -ENOENT) - return status; /* killed */ - if (status == -EILSEQ || - status == -ECONNRESET || - status == -ETIMEDOUT) - return -ENODEV; /* device removed/shutdown */ - snd_printk(KERN_ERR "urb status %d\n", status); - return 0; /* continue */ + switch (status) { + /* manually unlinked, or device gone */ + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + case -ENODEV: + return -ENODEV; + /* errors that might occur during unplugging */ + case -EPROTO: /* EHCI */ + case -ETIMEDOUT: /* OHCI */ + case -EILSEQ: /* UHCI */ + return -EIO; + default: + snd_printk(KERN_ERR "urb status %d\n", status); + return 0; /* continue */ + } } /* @@ -218,8 +235,15 @@ static void snd_usbmidi_in_urb_complete(struct urb* urb, struct pt_regs *regs) ep->umidi->usb_protocol_ops->input(ep, urb->transfer_buffer, urb->actual_length); } else { - if (snd_usbmidi_urb_error(urb->status) < 0) + int err = snd_usbmidi_urb_error(urb->status); + if (err < 0) { + if (err != -ENODEV) { + ep->error_resubmit = 1; + mod_timer(&ep->umidi->error_timer, + jiffies + ERROR_DELAY_JIFFIES); + } return; + } } if (usb_pipe_needs_resubmit(urb->pipe)) { @@ -236,8 +260,13 @@ static void snd_usbmidi_out_urb_complete(struct urb* urb, struct pt_regs *regs) ep->urb_active = 0; spin_unlock(&ep->buffer_lock); if (urb->status < 0) { - if (snd_usbmidi_urb_error(urb->status) < 0) + int err = snd_usbmidi_urb_error(urb->status); + if (err < 0) { + if (err != -ENODEV) + mod_timer(&ep->umidi->error_timer, + jiffies + ERROR_DELAY_JIFFIES); return; + } } snd_usbmidi_do_output(ep); } @@ -276,6 +305,24 @@ static void snd_usbmidi_out_tasklet(unsigned long data) snd_usbmidi_do_output(ep); } +/* called after transfers had been interrupted due to some USB error */ +static void snd_usbmidi_error_timer(unsigned long data) +{ + snd_usb_midi_t *umidi = (snd_usb_midi_t *)data; + int i; + + for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { + snd_usb_midi_in_endpoint_t *in = umidi->endpoints[i].in; + if (in && in->error_resubmit) { + in->error_resubmit = 0; + in->urb->dev = umidi->chip->dev; + snd_usbmidi_submit_urb(in->urb, GFP_ATOMIC); + } + if (umidi->endpoints[i].out) + snd_usbmidi_do_output(umidi->endpoints[i].out); + } +} + /* helper function to send static data that may not DMA-able */ static int send_bulk_static_data(snd_usb_midi_out_endpoint_t* ep, const void *data, int len) @@ -844,8 +891,6 @@ static unsigned int snd_usbmidi_count_bits(unsigned int x) */ static void snd_usbmidi_out_endpoint_delete(snd_usb_midi_out_endpoint_t* ep) { - if (ep->tasklet.func) - tasklet_kill(&ep->tasklet); if (ep->urb) { usb_buffer_free(ep->umidi->chip->dev, ep->max_transfer, ep->urb->transfer_buffer, @@ -934,8 +979,11 @@ void snd_usbmidi_disconnect(struct list_head* p) int i; umidi = list_entry(p, snd_usb_midi_t, list); + del_timer_sync(&umidi->error_timer); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { snd_usb_midi_endpoint_t* ep = &umidi->endpoints[i]; + if (ep->out) + tasklet_kill(&ep->out->tasklet); if (ep->out && ep->out->urb) { usb_kill_urb(ep->out->urb); if (umidi->usb_protocol_ops->finish_out_endpoint) @@ -1496,6 +1544,9 @@ int snd_usb_create_midi_interface(snd_usb_audio_t* chip, umidi->iface = iface; umidi->quirk = quirk; umidi->usb_protocol_ops = &snd_usbmidi_standard_ops; + init_timer(&umidi->error_timer); + umidi->error_timer.function = snd_usbmidi_error_timer; + umidi->error_timer.data = (unsigned long)umidi; /* detect the endpoint(s) to use */ memset(endpoints, 0, sizeof(endpoints)); From 81d4af1340badcd2100c84fbd1bfd13156de41aa Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 30 Aug 2005 18:48:52 +0400 Subject: [PATCH 583/584] [PATCH] x86: pci_assign_unassigned_resources() update I had some time to think about PCI assign issues in 2.6.13-rc series. The major problem here is that we call pci_assign_unassigned_resources() way too early - at subsys_initcall level. Therefore we give no chances to ACPI and PnP routines (called at fs_initcall level) to reserve their respective resources properly, as the comments in drivers/pnp/system.c and drivers/acpi/motherboard.c suggest: /** * Reserve motherboard resources after PCI claim BARs, * but before PCI assign resources for uninitialized PCI devices */ So I moved the pci_assign_unassigned_resources() call to pcibios_assign_resources() (fs_initcall), which should hopefully fix a lot of problems and make PCIBIOS_MIN_IO tweaks unnecessary. Other changes: - remove resource assignment code from pcibios_assign_resources(), since it duplicates pci_assign_unassigned_resources() functionality and actually does nothing in 2.6.13; - modify ROM assignment code as per Ben's suggestion: try to use firmware settings by default (if PCI_ASSIGN_ROMS is not set); - set CARDBUS_IO_SIZE back to 4K as it's a wonderful stress test for various setups. Confirmed by Tero Roponen (who had problems with the 4kB CardBus IO size previously). Signed-off-by: Linus Torvalds --- arch/i386/pci/common.c | 1 - arch/i386/pci/i386.c | 49 ++++++++++++++--------------------------- drivers/pci/setup-bus.c | 2 +- 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index ade5bc57c34c..c96bea14b98f 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -165,7 +165,6 @@ static int __init pcibios_init(void) if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) pcibios_sort(); #endif - pci_assign_unassigned_resources(); return 0; } diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c index 93a364c82150..3cc480998a47 100644 --- a/arch/i386/pci/i386.c +++ b/arch/i386/pci/i386.c @@ -170,43 +170,26 @@ static void __init pcibios_allocate_resources(int pass) static int __init pcibios_assign_resources(void) { struct pci_dev *dev = NULL; - int idx; - struct resource *r; + struct resource *r, *pr; - for_each_pci_dev(dev) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - - /* - * Don't touch IDE controllers and I/O ports of video cards! - */ - if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || - (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) - continue; - - /* - * We shall assign a new address to this resource, either because - * the BIOS forgot to do so or because we have decided the old - * address was unusable for some reason. - */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - - if (pci_probe & PCI_ASSIGN_ROMS) { + if (!(pci_probe & PCI_ASSIGN_ROMS)) { + /* Try to use BIOS settings for ROMs, otherwise let + pci_assign_unassigned_resources() allocate the new + addresses. */ + for_each_pci_dev(dev) { r = &dev->resource[PCI_ROM_RESOURCE]; - r->end -= r->start; - r->start = 0; - if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); + if (!r->flags || !r->start) + continue; + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + r->end -= r->start; + r->start = 0; + } } } + + pci_assign_unassigned_resources(); + return 0; } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 6d864c502a1f..6b0e6464eb39 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -40,7 +40,7 @@ * FIXME: IO should be max 256 bytes. However, since we may * have a P2P bridge below a cardbus bridge, we need 4K. */ -#define CARDBUS_IO_SIZE (256) +#define CARDBUS_IO_SIZE (4*1024) #define CARDBUS_MEM_SIZE (32*1024*1024) static void __devinit From 62c592edead3c3a045662595f7ade3c12f133373 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 30 Aug 2005 13:40:22 -0300 Subject: [PATCH 584/584] [PATCH] ppc32 8xx: fix warnings in m8xx_setup.c The following patch fixes two warnings in arch/ppc/syslib/m8xx_setup.c Signed-off-by: Marcelo Tosatti Signed-off-by: Linus Torvalds --- arch/ppc/syslib/m8xx_setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c index a3702cfe8f7c..4c888da89b3c 100644 --- a/arch/ppc/syslib/m8xx_setup.c +++ b/arch/ppc/syslib/m8xx_setup.c @@ -57,7 +57,7 @@ unsigned char __res[sizeof(bd_t)]; extern void m8xx_ide_init(void); extern unsigned long find_available_memory(void); -extern void m8xx_cpm_reset(); +extern void m8xx_cpm_reset(void); extern void m8xx_wdt_handler_install(bd_t *bp); extern void rpxfb_alloc_pages(void); extern void cpm_interrupt_init(void); @@ -266,8 +266,8 @@ m8xx_show_percpuinfo(struct seq_file *m, int i) bp = (bd_t *)__res; - seq_printf(m, "clock\t\t: %ldMHz\n" - "bus clock\t: %ldMHz\n", + seq_printf(m, "clock\t\t: %uMHz\n" + "bus clock\t: %uMHz\n", bp->bi_intfreq / 1000000, bp->bi_busfreq / 1000000);